diff options
252 files changed, 67222 insertions, 0 deletions
@@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. @@ -0,0 +1,25 @@ + + Ragel State Machine Compiler -- CREDITS + ======================================= + +* Written by Adrian Thurston <thurston@cs.queensu.ca>. + +* Objective-C output contributed by Eric Ocean. + +* D output and many great ideas contributed by Alan West. + +* Conditionals inspired by David Helder. + +* Java code generation contributions, bug reports, fixes, test cases + and suggestions from Colin Fleming + +* Useful discussion and bug from Carlos Antunes. + +* Feedback, Packaging, and Fixes provided by: + + Bob Tennent, Robert Lemmen, Tobias Jahn, Cris Bailiff, Buddy Betts, Scott + Dixon, Steven Handerson, Michael Somos, Bob Paddock, Istvan Buki, David + Drai, Matthias Rahlf, Zinx Verituse, Markus W. Weissmann, Marc Liyanage, + Eric Ocean, Alan West, Steven Kibbler, Laurent Boulard, Jon Oberheide, + David Helder, Lexington Luthor, Jason Jobe, Colin Fleming, Carlos Antunes, + Steve Horne diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..83795a6 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,1213 @@ +For Next Release +================ + -The '%when condition' syntax was functioning like '$when condition'. This + was fixed. + +Ragel 5.16 - Nov 20, 2006 +========================= + -Bug fix: the fhold and fexec directives did not function correctly in + scanner pattern actions. In this context manipulations of p may be lost or + made invalid. To fix this, fexec and fhold now manipulate tokend, which is + now always used to update p when the action terminates. + +Ragel 5.15 - Oct 31, 2006 +========================= + -A language independent test harness was introduced. Test cases can be + written using a custom mini-language in the embedded actions. This + mini-language is then translated to C, D and Java when generating the + language-specific test cases. + -Several existing tests have been ported to the language-independent format + and a number of new language-independent test cases have been added. + -The state-based embedding operators which access states that are not the + start state and are not final (the 'middle' states) have changed. They + were: + <@/ eof action into middle states + <@! error action into middle states + <@^ local error action into middle states + <@~ to-state action into middle states + <@* from-state action into middle states + They are now: + <>/ eof action into middle states + <>! error action into middle states + <>^ local error action into middle states + <>~ to-state action into middle states + <>* from-state action into middle states + -The verbose form of embeddings using the <- operator have been removed. + This syntax was difficult to remember. + -A new verbose form of state-based embedding operators have been added. + These are like the symbol versions, except they replace the symbols: + / ! ^ ~ * + with literal keywords: + eof err lerr to from + -The following words have been promoted to keywords: + when eof err lerr to from + -The write statment now gets its own lexical scope in the scanner to ensure + that commands are passed through as is (not affected by keywords). + -Bug fix: in the code generation of fret in scanner actions the adjustment to + p that is needed in some cases (dependent on content of patterns) was not + happening. + -The fhold directive, which decrements p, cannot be permitted in the pattern + action of a scanner item because it will not behave consistently. At the end + of a pattern action p could be decremented, set to a new value or left + alone. This depends on the contents of the scanner's patterns. The user + cannot be expected to predict what will happen to p. + -Conditions in D code require a cast to the widec type when computing widec. + -Like Java, D code also needs if (true) branches for control flow in actions + in order to fool the unreachable code detector. This is now abstracted in + all code generators using the CTRL_FLOW() function. + -The NULL_ITEM value in java code should be -1. This is needed for + maintaining tokstart. + +Ragel 5.14 - Oct 1, 2006 +======================== + -Fixed the check for use of fcall in actions embedded within longest match + items. It was emitting an error if an item's longest-match action had an + fcall, which is allowed. This bug was introduced while fixing a segfault in + version 5.8. + -A new minimization option was added: MinimizeMostOps (-l). This option + minimizes at every operation except on chains of expressions and chains of + terms (eg, union and concat). On these chains it minimizes only at the last + operation. This makes test cases with many states compile faster, without + killing the performance on grammars like strings2.rl. + -The -l minimiziation option was made the default. + -Fixes to Java code: Use of the fc value did not work, now fixed. Static data + is now declared with the final keyword. Patch from Colin Fleming. Conditions + now work when generating Java code. + -The option -p was added to rlcodegen which causes printable characters to be + printed in GraphViz output. Patch from Colin Fleming. + -The "element" keyword no longer exists, removed from vim syntax file. + Updated keyword highlighting. + -The host language selection is now made in the frontend. + -Native host language types are now used when specifying the alphtype. + Previously all languages used the set defined by C, and these were mapped to + the appropriate type in the backend. + +Ragel 5.13 - Sep 7, 2006 +======================== + -Fixed a careless error which broke Java code generation. + +Ragel 5.12 - Sep 7, 2006 +======================== + -The -o flag did not work in combination with -V. This was fixed. + -The split code generation format uses only the required number of digits + when writing out the number in the file name of each part. + -The -T0, -F0 and -G0 codegens should write out the action list iteration + variables only when there are regular, to state or from state actions. The + code gens should not use anyActions(). + -If two states have the same EOF actions, they are written out in the finish + routine as one case. + -The split and in-place goto formats would sometimes generate _out when it is + not needed. This was fixed. + -Improved the basic partitioning in the split code gen. The last partition + would sometimes be empty. This was fixed. + -Use of 'fcall *' was not causing top to be initialized. Fixed. + -Implemented a Java backend, specified with -J. Only the table-based format + is supported. + -Implemented range compression in the frontend. This has no effect on the + generated code, however it reduces the work of the backend and any programs + that read the intermediate format. + +Ragel 5.11 - Aug 10, 2006 +========================= + -Added a variable to the configure.in script which allows the building of + the parsers to be turned off (BUILD_PARSERS). Parser building is off by + default for released versions. + -Removed configure tests for bison defines header file. Use --defines=file + instead. + -Configure script doesn't test for bison, flex and gperf when building of the + parsers is turned off. + -Removed check for YYLTYPE structure from configure script. Since shipped + code will not build parsers by default, we don't need to be as accomodating + of other versions of bison. + -Added a missing include that showed up with g++ 2.95.3. + -Failed configure test for Objective-C compiler is now silent. + +Ragel 5.10 - Jul 31, 2006 +========================= + -Moved the check for error state higher in the table-based processing loop. + -Replaced naive implementations of condition searching with proper ones. In + the table-based formats the searching is also table-based. In the directly + executed formats the searching is also directly executable. + -The minimization process was made aware of conditions. + -A problem with the condition implementation was fixed. Previously we were + taking pointers to transitions and then using them after a call to + outTransCopy, which was a bad idea because they may be changed by the call. + -Added test mailbox3.rl which is based on mailbox2.rl but includes conditions + for restricting header and message body lengths. + -Eliminated the initial one-character backup of p just before resuming + execution. + -Added the -s option to the frontend for printing statistics. This currently + includes just the number of states. + -Sped up the generation of the in-place goto-driven (-G2) code style. + -Implemented a split version of in-place goto-driven code style. This code + generation style is suitable for producing fast implementations of very + large machines. Partitioning is currently naive. In the future a + high-quality partitioning program will be employed. The flag for accessing + this feature is -Pn, where n is the number of partitions. + -Converted mailbox1.rl, strings2.rl and cppscan1.rl tests to support the + split code generation. + -Fixes and updates were made to the runtests script: added -c for compiling + only, changed the -me option to -e, and added support for testing the split + code style. + +Ragel 5.9 - Jul 19, 2006 +======================== + -Fixed a bug in the include system which caused malformed output from the + frontend when the include was made from a multi-line machine spec and the + included file ended in a single line spec (or vice versa). + -Static data is now const. + -Actions which referenced states but were not embedded caused the frontend to + segfault, now fixed. + -Manual now built with pdflatex. + -The manual was reorganized and expanded. Chapter sequence is now: + Introduction, Constructing Machines, Embedding Actions, Controlling + Nondeterminism and Interfacing to the Host program. + +Ragel 5.8 - Jun 17, 2006 +======================== + -The internal representation of the alphabet type has been encapsulated + into a class and all operations on it have been defined as C++ operators. + -The condition implementation now supports range transitions. This allows + conditions to be embedded into arbitrary machines. Conditions are still + exprimental. + -More condition embedding operators were added + 1. Isolate the start state and embed a condition into all transitions + leaving it: + >when cond OR >?cond + 2. Embed a condition into all transitions: + when cond OR $when cond OR $?cond + 3. Embed a condition into pending out transitions: + %when cond OR %?cond + -Improvements were made to the determinization process to support pending out + conditions. + -The Vim sytax file was fixed so that :> doesn't cause the match of a label. + -The test suite was converted to a single-file format which uses less disk + space than the old directory-per-test format. + +Ragel 5.7 - May 14, 2006 +======================== + -Conditions will not be embedded like actions because they involve a + manipulation of the state machine they are specified in. They have therefore + been taken out of the verbose action embedding form (using the <- compound + symbol). A new syntax for specifying conditions has been created: + m = '\n' when {i==4}; + -Fixed a bug which prevented state machine commands like fcurs, fcall, fret, + etc, from being accounted for in from-state actions and to-state actions. + This prevented some necessary support code from being generated. + -Implemented condition testing in remaining code generators. + -Configure script now checks for gperf, which is required for building. + -Added support for case-insensitive literal strings (in addition to regexes). + A case-insensitive string is made by appending an 'i' to the literal, as in + 'cmd'i or "cmd"i. + -Fixed a bug which caused all or expressions inside of all regular + expressions to be case-insensitive. For example /[fo]o bar/ would make the + [fo] part case-insensitive even though no 'i' was given following the + regular expression. + +Ragel 5.6 - Apr 1, 2006 +======================= + -Added a left-guarded concatenation operator. This operator <: is equivalent + to ( expr1 $1 . expr2 >0 ). It is useful if you want to prefix a sequence + with a sequence of a subset of the characters it matches. For example, one + can consume leading whitespace before tokenizing a sequence of whitespace + separated words: ( ' '* <: ( ' '+ | [a-z]+ )** ) + -Removed context embedding code, which has been dead since 5.0. + +Ragel 5.5 - Mar 28, 2006 +======================== + -Implemented a case-insensitive option for regular expressions: /get/i. + -If no input file is given to the ragel program it reads from standard input. + -The label of the start state has been changed from START to IN to save on + required screen space. + -Bug fix: \0 was not working in literal strings, due to a change that reduced + memory usage by concatenating components of literal strings. Token data + length is now passed from the scanner to the paser so that we do not need to + rely on null termination. + +Ragel 5.4 - Mar 12, 2006 +======================== + -Eliminated the default transition from the frontend implementation. This + default transition was a space-saving optimization that at best could reduce + the number of allocated transitions by one half. Unfortunately it + complicated the implementation and this stood in the way of introducing + conditionals. The default transition may be reintroduced in the future. + -Added entry-guarded concatenation. This operator :>, is syntactic sugar + for expr1 $0 . expr >1. This operator terminates the matching of the first + machine when a first character of the second machine is matched. For + example in any* . ';' we never leave the any* machine. If we use any* :> ';' + then the any* machine is terminiated upon matching the semi-colon. + -Added finish-guarded concatenation. This operator :>>, is syntactic sugar + for expr1 $0 . expr @1. This operator is like entry guarded concatenation + except the first machine is terminated when the second machine enters a + final state. This is useful for delaying the guard until a full pattern is + matched. For example as in '/*' any* :>> '*/'. + -Added strong subtraction. Where regular subtraction removes from the first + machine any strings that are matched by the second machine, strong + subtraction removes any strings from the first that contain any strings of + the second as a substring. Strong subtraction is syntactic sugar for + expr1 - ( any* expr2 any* ). + -Eliminated the use of priorities from the examples. Replaced with + subtraction, guarded concatenation and longest-match kleene star. + -Did some initial work on supporting conditional transitions. Far from + complete and very buggy. This code will only be active when conditionals are + used. + +Ragel 5.3 - Jan 27, 2006 +======================== + -Added missing semi-colons that cause the build to fail when using older + versions of Bison. + -Fix for D code: if the contents of an fexec is a single word, the generated + code will get interpreted as a C-style cast. Adding two brackets prevents + this. Can now turn eliminate the "access this.;" in cppscan5 that was used to + get around this problem. + -Improved some of the tag names in the intermediate format. + -Added unsigned long to the list of supported alphabet types. + -Added ids of actions and action lists to XML intermediate format. Makes it + more human readable. + -Updated to latest Aapl package. + +Ragel 5.2 - Jan 6, 2006 +======================== + -Ragel emits an error if the target of fentry, fcall, fgoto or fnext is inside + a longest match operator, or if an action embedding in a longest match + machine uses fcall. The fcall command can still be used in pattern actions. + -Made improvements to the clang, rlscan, awkemu and cppscan examples. + -Some fixes to generated label names: they should all be prefixed with _. + -A fix to the Vim syntax highlighting script was made + -Many fixes and updates to the documentation. All important features and + concepts are now documented. A second chapter describing Ragel's use + was added. + +Ragel 5.1 - Dec 22, 2005 +======================== + -Fixes to the matching of section delimiters in Vim syntax file. + -If there is a longest match machine, the tokend var is now initialized by + write init. This is not necessary for correct functionality, however + prevents compiler warnings. + -The rlscan example was ported to the longest match operator and changed to + emit XML data. + -Fix to the error handling in the frontend: if there are errors in the lookup + of names at machine generation time then do not emit anything. + -If not compiling the full machine in the frontend (by using -M), avoid + errors and segfaults caused by names that are not part of the compiled + machine. + -Longest match bug fix: need to init tokstart when returing from fsm calls + that are inside longest match actions. + -In Graphviz drawing, the arrow into the start state is not a real + transition, do not draw to-state actions on the label. + -A bug fix to the handling of non-tag data within an XML tag was made. + -Backend exit value fixed: since the parser now accepts nothing so as to + avoid a redundant parse error when the frontend dies, we must force an + error. The backend should now be properly reporting errors. + -The longest match machine now has it's start state set final. An LM machine + is in a final state when it has not matched anything, when it has matched + and accepted a token and is ready for another, and when it has matched a + token but is waiting for some lookahead before determining what to do about + it (similar to kleene star). + -Element statement removed from some tests. + -Entry point names are propagated to the backend and used to label the entry + point arrows in Graphviz output. + +Ragel 5.0 - Dec 17, 2005 +======================== + (additional details in V5 release notes) + -Ragel has been split into two executables: A frontend which compiles + machines and emits them in an XML format, and a backend which generates code + or a Graphviz dot file from the XML input. The purpose of this split is to + allow Ragel to interface with other tools by means of the XML intermediate + format and to reduce complexity by strictly separating the previously + entangled phases. The intermediate format will provide a better platform + inspecting compiled machines and for extending Ragel to support other host + languages. + -The host language interface has been reduced significantly. Ragel no longer + expects the machine to be implemented as a structure or class and does not + generate functions corresponding to initialization, execution and EOF. + Instead, Ragel just generates the code of these components, allowing all of + them to be placed in a single function if desired. The user specifies a + machine in the usual manner, then indicates at which place in the program + text the state machine code is to be generated. This is done using the write + statement. It is possible to specify to Ragel how it should access the + variables it needs (such as the current state) using the access statement. + -The host language embedding delimiters have been changed. Single line + machines start with '%%' and end at newline. Multiline machines start with + '%%{' and end with '}%%'. The machine name is given with the machine + statement at the very beginning of the specification. This purpose of this + change is to make it easier separate Ragel code from the host language. This + will ease the addition of supported host languages. + -The structure and class parsing which was previously able to extract a + machine's name has been removed since this feature is dependent on the host + language and inhibits the move towards a more language-independent frontend. + -The init, element and interface statements have been made obsolete by the + new host language interface and have been removed. + -The fexec action statement has been changed to take only the new position to + move to. This statement is more useful for moving backwards and reparsing + input than for specifying a whole new buffer entirely and has been shifted + to this new use. Giving it only one argument also simplifies the parsing of + host code embedded in a Ragel specification. This will ease the addition of + supported host languages. + -Introduced the fbreak statement, which allows one to stop processing data + immediately. The machine ends up in the state that the current transition + was to go to. The current character is not changed. + -Introduced the noend option for writing the execute code. This inhibits + checking if we have reached pe. The machine will run until it goes into the + error state or fbreak is hit. This allows one to parse null-terminate + strings without first computing the length. + -The execute code now breaks out of the processing loop when it moves into + the error state. Previously it would run until pe was hit. Breaking out + makes the noend option useful when an error is encountered and allows + user code to determine where in the input the error occured. It also + eliminates needlessly iterating the input buffer. + -Introduced the noerror, nofinal and noprefix options for writing the machine + data. The first two inhibit the writing of the error state and the + first-final state should they not be needed. The noprefix eliminates the + prefixing of the data items with the machine name. + -Support for the D language has been added. This is specified in the backend + with the -D switch. + -Since the new host language interface has been reduced considerably, Ragel + no longer needs to distinguish between C-based languages. Support for C, C++ + and Objective-C has been folded into one option in the backend: -C + -The code generator has been made independent of the languages that it + supports by pushing the language dependent apsects down into the lower + levels of the code generator. + -Many improvements to the longest match construction were made. It is no + longer considered experimental. A longest match machine must appear at the + top level of a machine instantiation. Since it does not generate a pure + state machine (it may need to backtrack), it cannot be used as an operand to + other operators. + -References to the current character and current state are now completely + banned in EOF actions. + +Ragel 4.2 - Sep 16, 2005 +======================== + (additional details in V4 release notes) + -Fixed a bug in the longest match operator. In some states it's possible that + we either match a token or match nothing at all. In these states we need to + consult the LmSwitch on error so it must be prepared to execute an error + handler. We therefore need to init act to this error value (which is zero). + We can compute if we need to do this and the code generator emits the + initialization only if necessary. + -Changed the definition of the token end of longest match actions. It now + points to one past the last token. This makes computing the token length + easier because you don't have to add one. The longest match variables token + start, action identifier and token end are now properly initialized in + generated code. They don't need to be initialized in the user's code. + -Implemented to-state and from-state actions. These actions are executed on + transitions into the state (after the in transition's actions) and on + transitions out of the state (before the out transition's actions). See V4 + release notes for more information. + -Since there are no longer any action embedding operators that embed both on + transitions and on EOF, any actions that exist in both places will be there + because the user has explicitly done so. Presuming this case is rare, and + with code duplication in the hands of the user, we therefore give the EOF + actions their own action switch in the finish() function. This is further + motivated by the fact that the best solution is to do the same for to-state + and from-state actions in the main loop. + -Longest match actions can now be specified using a named action. Since a + word following a longest match item conflicts with the concatenation of a + named machine, the => symbol must come immediately before a named action. + -The longest match operator permits action and machine definitions in the + middle of a longest match construction. These are parsed as if they came + before the machine definition they are contained in. Permitting action and + machine definitions in a longest match construction allows objects to be + defined closer to their use. + -The longest match operator can now handle longest match items with no + action, where previously Ragel segfaulted. + -Updated to Aapl post 2.12. + -Fixed a bug in epsilon transition name lookups. After doing a name lookup + the result was stored in the parse tree. This is wrong because if a machine + is used more than once, each time it may resolve to different targets, + however it will be stored in the same place. We now store name resolutions + in a separated data structure so that each walk of a parse tree uses the + name resolved during the corresponding walk in the name lookup pass. + -The operators used to embed context and actions into states have been + modified. The V4 release notes contain the full details. + -Added zlen builtin machine to represent the zero length machine. Eventually + the name "null" will be phased out in favour of zlen because it is unclear + whether null matches the zero length string or if it does not match any + string at all (as does the empty builtin). + -Added verbose versions of action, context and priority embedding. See the V4 + release notes for the full details. A small example: + machine <- all exec { foo(); } <- final eof act1 + -Bugfix for machines with epsilon ops, but no join operations. I had + wrongfully assumed that because epsilon ops can only increase connectivity, + that no states are ever merged and therefore a call to fillInStates() is not + necessary. In reality, epsilon transitions within one machine can induce the + merging of states. In the following, state 2 follows two paths on 'i': + main := 'h' -> i 'i h' i: 'i'; + -Changed the license of the guide from a custom "do not propagate modified + versions of this document" license to the GPL. + +Ragel 4.1 - Jun 26, 2005 +======================== + (additional details in V4 release notes) + -A bug in include processing was fixed. Surrounding code in an include file + was being passed through to the output when it should be ignored. Includes + are only for including portions of another machine into he current. This + went unnoticed because all tested includes were wrapped in #ifndef ... + #endif directives and so did not affect the compilation of the file making + the include. + -Fixes were made to Vim syntax highlighting file. + -Duplicate actions are now removed from action lists. + -The character-level negation operator ^ was added. This operator produces a + machine that matches single characters that are not matched by the machine + it is applied to. This unary prefix operator has the same precedence level + as !. + -The use of + to specify the a positive literal number was discontinued. + -The parser now assigns the subtraction operator a higher precedence than + the negation of literal number. + +Ragel 4.0 - May 26, 2005 +======================== + (additional details in V4 release notes) + -Operators now strictly embed into a machine either on a specific class of + characters or on EOF, but never both. This gives a cleaner association + between the operators and the physical state machine entitites they operate + on. This change is made up of several parts: + 1. '%' operator embeds only into leaving characters. + 2. All global and local error operators only embed on error character + transitions, their action will not be triggerend on EOF in non-final + states. + 3. EOF action embedding operators have been added for all classes of states + to make up for functionality removed from other operators. These are + >/ $/ @/ %/. + 4. Start transition operator '>' no longer implicitly embeds into leaving + transtions when start state is final. + -Ragel now emits warnings about the improper use of statements and values in + action code that is embedded as an EOF action. Warnings are emitted for fpc, + fc, fexec, fbuf and fblen. + -Added a longest match construction operator |* machine opt-action; ... *|. + This is for repetition where an ability to revert to a shorter, previously + matched item is required. This is the same behaviour as flex and re2c. The + longest match operator is not a pure FSM construction, it introduces + transitions that implicitly hold the current character or reset execution to + a previous location in the input. Use of this operator requires the caller + of the machine to occasionally hold onto data after a call to the exectute + routine. Use of machines generated with this operator as the input to other + operators may have undefined results. See examples/cppscan for an example. + This is very experimental code. + -Action ids are only assigned to actions that are referenced in the final + constructed machine, preventing gaps in the action id sequence. Previously + an action id was assigned if the action was referenced during parsing. + -Machine specifications now begin with %% and are followed with an optional + name and either a single Ragel statement or a sequence of statements + enclosed in {}. + -Ragel no longer generates the FSM's structure or class. It is up to the user + to declare the structure and to give it a variable named curs of type + integer. If the machine uses the call stack the user must also declare a + array of integers named stack and an integer variable named top. + -In the case of Objective-C, Ragel no longer generates the interface or + implementation directives, allowing the user to declare additional methods. + -If a machine specification does not have a name then Ragel tries to find a + name for it by first checking if the specification is inside a struct, class + or interface. If it is not then it uses the name of the previous machine + specification. If still no name is found then this is an error. + -Fsm specifications now persist in memory and statements accumulate. + -Ragel now has an include statement for including the statements of a machine + spec in another file (perhaps because it is the corresponding header file). + The include statement can also be used to draw in the statements of another + fsm spec in the current file. + -The fstack statement is now obsolete and has been removed. + -A new statement, simply 'interface;', indicates that ragel should generate + the machine's interface. If Ragel sees the main machine it generates the + code sections of the machine. Previously, the header portion was generated + if the (now removed) struct statement was found and code was generated if + any machine definition was found. + -Fixed a bug in the resolution of fsm name references in actions. The name + resolution code did not recurse into inline code items with children + (fgoto*, fcall*, fnext*, and fexec), causing a segfault at code generation + time. + -Cleaned up the code generators. FsmCodeGen was made into a virtual base + class allowing for the language/output-style specific classes to inherit + both a language specific and style-specific base class while retaining only + one copy of FsmCodeGen. Language specific output can now be moved into the + language specific code generators, requiring less duplication of code in the + language/output-style specific leaf classes. + -Fixed bugs in fcall* implementation of IpgGoto code generation. + -If the element type has not been defined Ragel now uses a constant version + of the alphtype, not the exact alphtype. In most cases the data pointer of + the execute routine should be const. A non-const element type can still be + defined with the element statement. + -The fc special value now uses getkey for retrieving the current char rather + than *_p, which is wrong if the element type is a structure. + -User guide converted to TeX and updated for new 4.0 syntax and semantics. + +Ragel 3.7 - Oct 31, 2004 +======================== + -Bug fix: unreferenced machine instantiations causing segfault due to name + tree and parse tree walk becomming out of syncronization. + -Rewrote representation of inline code blocks using a tree data structure. + This allows special keywords such as fbuf to be used as the operatands of + other fsm commands. + -Documentation updates. + -When deciding whether or not to generate machine instantiations, search the + entire name tree beneath the instantiation for references, not just the + root. + -Removed stray ';' in keller2.rl + -Added fexec for restarting the machine with new buffer data (state stays the + same), fbuf for retrieving the the start of the buf, and fblen for + retrieving the orig buffer length. + -Implemented test/cppscan2 using fexec. This allows token emitting and restart + to stay inside the execute routine, instead of leaving and re-entering on + every token. + -Changed examples/cppscan to use fexec and thereby go much faster. + -Implemented flex and re2c versions of examples/cppscan. Ragel version + goes faster than flex version but not as fast as re2c version. + -Merged in Objective-C patch from Eric Ocean. + -Turned off syncing with stdio in C++ tests to make them go faster. + -Renamed C++ code generaion classes with the Cpp Prefix instead of CC to make + them easier to read. + -In the finish function emit fbuf as 0 cast to a pointer to the element type + so it's type is not interpreted as an integer. + -The number -128 underflows char alphabets on some architectures. Removed + uses of it in tests. + -Disabled the keller2 test because it causes problems on many architectures + due to its large size and compilation requirements. + +Ragel 3.6 - Jul 10, 2004 +======================== + -Many documentation updates. + -When resolving names, return a set of values so that a reference in an + action block that is embedded more than once won't report distinct entry + points that are actually the same. + -Implemented flat tables. Stores a linear array of indicies into the + transition array and only a low and high key value. Faster than binary + searching for keys but not usable for large alphabets. + -Fixed bug in deleting of transitions leftover from converstion from bst to + list implementation of transitions. Other code cleanup. + -In table based output calculate the cost of using an index. Don't use if + cheaper. + -Changed fstate() value available in init and action code to to fentry() to + reflect the fact that the values returned are intended to be used as targets + in fgoto, fnext and fcall statements. The returned state is not a unique + state representing the label. There can be any number of states representing + a label. + -Added keller2 test, C++ scanning tests and C++ scanning example. + -In table based output split up transitions into targets and actions. This + allows actions to be omitted. + -Broke the components of the state array into separate arrays. Requires + adding some fields where they could previously be omitted, however allows + finer grained control over the sizes of items and an overal size reduction. + Also means that state numbers are not an offset into the state array but + instead a sequence of numbers, meaning the context array does not have any + wasted bits. + -Action lists and transition also have their types chosen to be the smallest + possible for accomodating the contained values. + -Changed curs state stored in fsm struct from _cs to curs. Keep fsm->curs == + -1 while in machine. Added tests curs1 and curs2. + -Implemented the notion of context. Context can be embedded in states using + >:, $:, @: and %: operators. These embed a named context into start states, + all states, non-start/non-final and final states. If the context is declared + using a context statment + context name; + then the context can be quered for any state using fsm_name_ctx_name(state) + in C code and fsm_name::ctx_name(state) in C++ code. This feature makes it + possible to determine what "part" of the machine is currently active. + -Fixed crash on machine generation of graphs with no final state. If there + is no reference to a final state in a join operation, don't generate one. + -Updated Vim sytax: added labels to inline code, added various C++ keywords. + Don't highlight name separations as labels. Added switch labels, improved + alphtype, element and getkey. + -Fixed line info in error reporting of bad epsilon trans. + -Fixed fstate() for tab code gen. + -Removed references to malloc.h. + +Ragel 3.5 - May 29, 2004 +======================== + -When parse errors occur, the partially generated output file is deleted and + an non-zero exit status is returned. + -Updated Vim syntax file. + -Implemented the setting of the element type that is passed to the execute + routine as well as method for specifying how ragel should retrive the key + from the element type. This lets ragel process arbitrary structures inside + of which is the key that is parsed. + element struct Element; + getkey fpc->character; + -The current state is now implemented with an int across all machines. This + simplifies working with current state variables. For example this allows a + call stack to be implemented in user code. + -Implemented a method for retrieving the current state, the target state, and + any named states. + fcurs -retrieve the current state + ftargs -retrieve the target state + fstate(name) -retrieve a named state. + -Implemented a mechanism for jumping to and calling to a state stored in a + variable. + fgoto *<expr>; -goto the state returned by the C/C++ expression. + fcall *<expr>; -call the state returned by the C/C++ expression. + -Implemented a mechanism for specifying the next state without immediately + transfering control there (any code following statement is executed). + fnext label; -set the state pointed to by label as the next state. + fnext *<expr>; -set the state returned by the C/C++ expression as the + next. + -Action references are determined from the final machine instead of during + the parse tree walk. Some actions can be referenced in the parse tree but not + show up in the final machine. Machine analysis is now done based on this new + computation. + -Named state lookup now employs a breadth-first search in the lookup and + allows the user to fully qualify names, making it possible to specify + jumps/calls into parts of the machine deep in the name hierarchy. Each part + of name (separated by ::) employs a breadth first search from it's starting + point. + -Name references now must always refer to a single state. Since references to + multiple states is not normally intended, it no longer happens + automatically. This frees the programmer from thinking about whether or not + a state reference is unique. It also avoids the added complexity of + determining when to merge the targets of multiple references. The effect of + references to multiple states can be explicitly created using the join + operator and epsilon transitions. + -M option was split into -S and -M. -S specifies the machine spec to generate + for graphviz output and dumping. -M specifies the machine definition or + instantiation. + -Machine function parameters are now prefixed with and underscore to + avoid the hiding of class members. + +Ragel 3.4 - May 8, 2004 +======================= + -Added the longest match kleene star operator **, which is synonymous + with ( ( <machine> ) $0 %1 ) *. + -Epsilon operators distinguish between leaving transitions (going to an + another expression in a comma separated list) and non-leaving transitions. + Leaving actions and priorities are appropriately transferred. + -Relative priority of following ops changed to: + 1. Action/Priority + 2. Epsilon + 3. Label + If label is done first then the isolation of the start state in > operators + will cause the label to point to the old start state that doesn't have the + new action/priority. + -Merged >! and >~, @! and @~, %! and %~, and $! and $~ operators to have one + set of global error action operators (>!, @!, %! and $!) that are invoked on + error by unexpected characters as well as by unexepected EOF. + -Added the fpc keyword for use in action code. This is a pointer to the + current character. *fpc == fc. If an action is invoked on EOF then fpc == 0. + -Added >^, @^, %^, and $^ local error operators. Global error operators (>!, + @!, $!, and %!) cause actions to be invoked if the final machine fails. + Local error actions cause actions to be invoked if if the current machine + fails. + -Changed error operators to mean embed global/local error actions in: + >! and !^ -the start state. + @! and @^ -states that are not the start state and are not final. + %! and %^ -final states. + $! and $^ -all states. + -Added >@! which is synonymous >! then @! + -Added >@^ which is synonymous >^ then @^ + -Added @%! which is synonymous @! then %! + -Added @%^ which is synonymous >^ then @^ + -FsmGraph representation of transition lists was changed from a mapping of + alphabet key -> transition objects using a BST to simply a list of + transition objects. Since the transitions are no longer divided by + single/range, the fast finding of transition objects by key is no longer + required functionality and can be eliminated. This new implementation uses + the same amount of memory however causes less allocations. It also make more + sense for supporting error transitions with actions. Previously an error + transition was represented by a null value in the BST. + -Regular expression ranges are checked to ensure that lower <= upper. + -Added printf-like example. + -Added atoi2, erract2, and gotcallret to the test suite. + -Improved build test to support make -jN and simplified the compiling and + running of tests. + +Ragel 3.3 - Mar 7, 2004 +========================= + -Portability bug fixes were made. Minimum and maximum integer values are + now taken from the system. An alignment problem on 64bit systems + was fixed. + +Ragel 3.2 - Feb 28, 2004 +======================== + -Added a Vim syntax file. + -Eliminated length var from generated execute code in favour of an end + pointer. Using length requires two variables be read and written. Using an + end pointer requires one variable read and written and one read. Results in + more optimizable code. + -Minimization is now on by default. + -States are ordered in output by depth first search. + -Bug in minimization fixed. States were not being distinguished based on + error actions. + -Added null and empty builtin machines. + -Added EOF error action operators. These are >~, >@, $~, and %~. EOF error + operators embed actions to take if the EOF is seen and interpreted as an + error. The operators correspond to the following states: + -the start state + -any state with a transition to a final state + -any state with a transiion out + -a final state + -Fixed bug in generation of unreference machine vars using -M. Unreferenced + vars don't have a name tree built underneath when starting from + instantiations. Need to instead build the name tree starting at the var. + -Calls, returns, holds and references to fc in out action code are now + handled for ipgoto output. + -Only actions referenced by an instantiated machine expression are put into + the action index and written out. + -Added rlscan, an example that lexes Ragel input. + +Ragel 3.1 - Feb 18, 2004 +======================== + -Duplicates in OR literals are removed and no longer cause an assertion + failure. + -Duplicate entry points used in goto and call statements are made into + deterministic entry points. + -Base FsmGraph code moved from aapl into ragel, as an increasing amount + of specialization is required. Too much time was spent attempting to + keep it as a general purpose template. + -FsmGraph code de-templatized and heirarchy squashed to a single class. + -Single transitions taken out of FsmGraph code. In the machine construction + stage, transitions are now implemented only with ranges and default + transtions. This reduces memory consumption, simplifies code and prevents + covered transitions. However it requires the automated selection of single + transitions to keep goto-driven code lean. + -Machine reduction completely rewritten to be in-place. As duplicate + transitions and actions are found and the machine is converted to a format + suitable for writing as C code or as GraphViz input, the memory allocated + for states and transitions is reused, instead of newly allocated. + -New reduction code consolodates ranges, selects a default transition, and + selects single transitions with the goal of joining ranges that are split by + any number of single characters. + -Line directive changed from "# <num> <file>" to the more common format + "#line <num> <file>". + -Operator :! changed to @!. This should have happened in last release. + -Added params example. + +Ragel 3.0 - Jan 22, 2004 +======================== + -Ragel now parses the contents of struct statements and action code. + -The keyword fc replaces the use of *p to reference the current character in + action code. + -Machine instantiations other than main are allowed. + -Call, jump and return statements are now available in action code. This + facility makes it possible to jump to an error handling machine, call a + sub-machine for parsing a field or to follow paths through a machine as + determined by arbitrary C code. + -Added labels to the language. Labels can be used anywhere in a machine + expression to define an entry point. Also references to machine definitions + cause the implicit creation of a label. + -Added epsilon transitions to the language. Epsilon operators may reference + labels in the current name scope resolved when join operators are evaluated + and at the root of the expression tree of machine assignment/instantiation. + -Added the comma operator, which joins machines together without drawing any + transitions between them. This operator is useful in combination with + labels, the epsilon operator and user code transitions for defining machines + using the named state and transition list paradigm. It is also useful for + invoking transitions based on some analysis of the input or on the + environment. + -Added >!, :!, $!, %! operators for specifying actions to take should the + machine fail. These operators embed actions to execute if the machine + fails in + -the start state + -any state with a transition to a final state + -any state with a transiion out + -a final state + The general rule is that if an action embedding operator embeds an action + into a set of transitions T, then the error-counterpart with a ! embeds an + action into the error transition taken when any transition T is a candidate, + but does not match the input. + -The finishing augmentation operator ':' has been changed to '@'. This + frees the ':' symbol for machine labels and avoids hacks to the parser to + allow the use of ':' for both labels and finishing augmentations. The best + hack required that label names be distinct from machine definition names as + in main := word : word; This restriction is not good because labels are + local to the machine that they are used in whereas machine names are global + entities. Label name choices should not be restricted by the set of names + that are in use for machines. + -Named priority syntax now requires parenthesis surrounding the name and + value pair. This avoids grammar ambiguities now that the ',' operator has + been introduced and makes it more clear that the name and value are an + asscociated pair. + -Backslashes are escaped in line directive paths. + +Ragel 2.2 - Oct 6, 2003 +======================= + -Added {n}, {,n}, {n,} {n,m} repetition operators. + <expr> {n} -- exactly n repetitions + <expr> {,n} -- zero to n repetitions + <expr> {n,} -- n or more repetitions + <expr> {n,m} -- n to m repetitions + -Bug in binary search table in Aapl fixed. Fixes crashing on machines that + add to action tables that are implicitly shared among transitions. + -Tests using obsolete minimization algorithms are no longer built and run by + default. + -Added atoi and concurrent from examples to the test suite. + +Ragel 2.1 - Sep 22, 2003 +======================== + -Bug in priority comparison code fixed. Segfaulted on some input with many + embedded priorities. + -Added two new examples. + +Ragel 2.0 - Sep 7, 2003 +======================= + -Optional (?), One or More (+) and Kleene Star (*) operators changed from + prefix to postfix. Rationale is that postfix version is far more common in + regular expression implementations and will be more readily understood. + -All priority values attached to transitions are now accompanied by a name. + Transitions no longer have default priority values of zero assigned + to them. Only transitions that have different priority values assigned + to the same name influence the NFA-DFA conversion. This scheme reduces + side-effects of priorities. + -Removed the %! statement for unsetting pending out priorities. With + named priorities, it is not necessary to clear the priorities of a + machine with $0 %! because non-colliding names can be used to avoid + side-effects. + -Removed the clear keyword, which was for removing actions from a machine. + Not required functionality and it is non-intuitive to have a language + feature that undoes previous definitions. + -Removed the ^ modifier to repetition and concatenation operators. This + undocumented feature prevented out transitions and out priorities from being + transfered from final states to transitions leaving machines. Not required + functionality and complicates the language unnecessarily. + -Keyword 'func' changed to 'action' as a part of the phasing out of the term + 'function' in favour of 'action'. Rationale is that the term 'function' + implies that the code is called like a C function, which is not necessarily + the case. The term 'action' is far more common in state machine compiler + implementations. + -Added the instantiation statement, which looks like a standard variable + assignment except := is used instead of =. Instantiations go into the + same graph dictionary as definitions. In the the future, instantiations + will be used as the target for gotos and calls in action code. + -The main graph should now be explicitly instantiated. If it is not, + a warning is issued. + -Or literal basic machines ([] outside of regular expressions) now support + negation and ranges. + -C and C++ interfaces lowercased. In the C interface an underscore now + separates the fsm machine and the function name. Rationale is that lowercased + library and generated routines are more common. + C output: + int fsm_init( struct clang *fsm ); + int fsm_execute( struct clang *fsm, char *data, int dlen ); + int fsm_finish( struct clang *fsm ); + C++ output: + int fsm::init( ); + int fsm::execute( char *data, int dlen ); + int fsm::finish( ); + -Init, execute and finish all return -1 if the machine is in the error state + and can never accept, 0 if the machine is in a non-accepting state that has a + path to a final state and 1 if the machine is in an accepting state. + -Accept routine eliminated. Determining whether or not the machine accepts is + done by examining the return value of the finish routine. + -In C output, fsm structure is no longer a typedef, so referencing requires + the struct keyword. This is to stay in line with C language conventions. + -In C++ output, constructor is no longer written by ragel. As a consequence, + init routine is not called automatically. Allows constructor to be supplied + by user as well as the return value of init to be examined without calling it + twice. + -Static start state and private structures are taken out of C++ classes. + +Ragel 1.5.4 - Jul 14, 2003 +========================== + -Workaround for building with bison 1.875, which produces an + optimization that doesn't build with newer version gcc. + +Ragel 1.5.3 - Jul 10, 2003 +========================== + -Fixed building with versions of flex that recognize YY_NO_UNPUT. + -Fixed version numbers in ragel.spec file. + +Ragel 1.5.2 - Jul 7, 2003 +========================= + -Transition actions and out actions displayed in the graphviz output. + -Transitions on negative numbers handled in graphviz output. + -Warning generated when using bison 1.875 now squashed. + +Ragel 1.5.1 - Jun 21, 2003 +========================== + -Bugs fixed: Don't delete the output objects when writing to standard out. + Copy mem into parser buffer with memcpy, not strcpy. Fixes buffer mem errror. + -Fixes for compiling with Sun WorkShop 6 compilers. + +Ragel 1.5.0 - Jun 10, 2003 +========================== + -Line directives written to the output so that errors in the action code + are properly reported in the ragel input file. + -Simple graphviz dot file output format is supported. Shows states and + transitions. Does not yet show actions. + -Options -p and -f dropped in favour of -d output format. + -Added option -M for specifying the machine to dump with -d or the graph to + generate with -V. + -Error recovery implemented. + -Proper line and column number tracking implemented in the scanner. + -All action/function code is now embedded in the main Execute routine. Avoids + duplication of action code in the Finish routine and the need to call + ExecFuncs which resulted in huge code bloat. Will also allow actions to + modify cs when fsm goto, call and return is supported in action code. + -Fsm spec can have no statements, nothing will be generated. + -Bug fix: Don't accept ] as the opening of a .-. range a reg exp. + -Regular expression or set ranges (ie /[0-9]/) are now handled by the parser + and consequently must be well-formed. The following now generates a parser + error: /[+-]/ and must be rewritten as /[+\-]/. Also fixes a bug whereby ] + might be accepted as the opening of a .-. range causing /[0-9]-[0-9]/ to + parse incorrectly. + -\v, \f, and \r are now treated as whitespace in an fsm spec. + +Ragel 1.4.1 - Nov 19, 2002 +========================== + -Compile fixes. The last release (integer alphabets) was so exciting + that usual portability checks got bypassed. + +Ragel 1.4.0 - Nov 19, 2002 +========================== + -Arbitrary integer alphabets are now fully supported! A new language + construct: + 'alphtype <type>' added for specifying the type of the alphabet. Default + is 'char'. Possible alphabet types are: + char, unsigned char, short, unsigned short, int, unsigned int + -Literal machines specified in decimal format can now be negative when the + alphabet is a signed type. + -Literal machines (strings, decimal and hex) have their values checked for + overflow/underflow against the size of the alphabet type. + -Table driven and goto driven output redesigned to support ranges. Table + driven uses a binary search for locating single characters and ranges. Goto + driven uses a switch statement for single characters and nested if blocks for + ranges. + -Switch driven output removed due to a lack of consistent advantages. Most of + the time the switch driven FSM is of no use because the goto FSM makes + smaller and faster code. Under certain circumstances it can produce smaller + code than a goto driven fsm and be almost as fast, but some sporadic case + does not warrant maintaining it. + -Many warnings changed to errors. + -Added option -p for printing the final fsm before minimization. This lets + priorities be seen. Priorties are all reset to 0 before minimization. The + exiting option -f prints the final fsm after minimization. + -Fixed a bug in the clang test and example that resulted in redundant actions + being executed. + +Ragel 1.3.4 - Nov 6, 2002 +========================= + -Fixes to Chapter 1 of the guide. + -Brought back the examples and made them current. + -MSVC is no longer supported for compiling windows binaries because its + support for the C++ standard is frustratingly inadequate, it will cost money + to upgrade if it ever gets better, and MinGW is a much better alternative. + -The build system now supports the --host= option for building ragel + for another system (used for cross compiling a windows binary with MinGW). + -Various design changes and fixes towards the goal of arbitrary integer + alphabets and the handling of larger state machines were made. + -The new shared vector class is now used for action lists in transitions and + states to reduce memory allocations. + -An avl tree is now used for the reduction of transitions and functions of an + fsm graph before making the final machine. The tree allows better scalability + and performance by not requiring consecutively larger heap allocations. + -Final stages in the separation of fsm graph code from action embedding and + priority assignment is complete. Makes the base graph leaner and easier to reuse + in other projects (like Keller). + +Ragel 1.3.3 - Oct 22, 2002 +========================== + -More diagrams were added to section 1.7.1 of the user guide. + -FSM Graph code was reworked to spearate the regex/nfa/minimizaion graph + algorithms from the manipulation of state and transition properties. + -An rpm spec file from Cris Bailiff was added. This allows an rpm for ragel + to be built with the command 'rpm -ta ragel-x.x.x.tar.gz' + -Fixes to the build system and corresponding doc updates in the README. + -Removed autil and included the one needed source file directly in the top + level ragel directory. + -Fixed a bug that nullified the 20 times speedup in large compilations + claimed by the last version. + -Removed awk from the doc build (it was added with the last release -- though + not mentioned in the changelog). + -Install of man page was moved to the doc dir. The install also installs the + user guide to $(PREFIX)/share/doc/ragel/ + +Ragel 1.3.2 - Oct 16, 2002 +========================== + -Added option -v (or --version) to show version information. + -The subtract operator no longer removes transition data from the machine + being subtracted. This is left up to the user for the purpose of making it + possible to transfer transitions using subtract and also for speeding up the + subtract routine. Note that it is possible to explicitly clear transition + data before a doing a subtract. + -Rather severe typo bug fixed. Bug was related to transitions with higher + priorities taking precedence. A wrong ptr was being returned. It appears to + have worked most of the time becuase the old ptr was deleted and the new one + allocated immediatly after so the old ptr often pointed to the same space. + Just luck though. + -Bug in the removing of dead end paths was fixed. If the start state + has in transitions then those paths were not followed when finding states to + keep. Would result in non-dead end states being removed from the graph. + -In lists and in ranges are no longer maintained as a bst with the key as the + alphabet character and the value as a list of transitions coming in on that + char. There is one list for each of inList, inRange and inDefault. Now that + the required functionality of the graph is well known it is safe to remove + these lists to gain in speed and footprint. They shouldn't be needed. + -IsolateStartState() runs on modification of start data only if the start + state is not already isolated, which is now possible with the new in list + representation. + -Concat, Or and Star operators now use an approximation to + removeUnreachableStates that does not require a traversal of the entire + graph. This combined with an 'on-the-fly' management of final bits and final + state status results is a dramatic speed increase when compiling machines + that use those operators heavily. The strings2 test goes 20 times faster. + -Before the final minimization, after all fsm operations are complete, + priority data is reset which enables better minimization in cases where + priorities would otherwise separate similar states. + +Ragel 1.3.1 - Oct 2, 2002 +========================= + -Range transitions are now used to implement machines made with /[a-z]/ and + the .. operator as well as most of the builtin machines. The ranges are not + yet reflected in the output code, they are expanded as if they came from the + regular single transitions. This is one step closer to arbitrary integer + output. + -The builtin machine 'any' was added. It is equiv to the builtin extend, + matching any characters. + -The builtin machine 'cntrl' now includes newline. + -The builtin machine 'space' now includes newline. + -The builtin machine 'ascii' is now the range 0-127, not all characters. + -A man page was written. + -A proper user guide was started. Chapter 1: Specifying Ragel Programs + was written. It even has some diagrams :) + +Ragel 1.3.0 - Sept 4, 2002 +========================== + -NULL keyword no longer used in table output. + -Though not yet in use, underlying graph structure changed to support range + transitions. As a result, most of the code that walks transition lists is now + implemented with an iterator that hides the complexity of the transition + lists and ranges. Range transitions will be used to implement /[a-z]/ style + machines and machines made with the .. operator. Previously a single + transition would be used for each char in the range, which is very costly. + Ranges eliminate much of the space complexity and allow for the .. operator + to be used with very large (integer) alphabets. + -New minimization similar to Hopcroft's alg. It does not require n^2 space and + runs close to O(n*log(n)) (an exact analysis of the alg is very hard). It is + much better than the stable and approx minimization and obsoletes them both. + An exact implementation of Hopcroft's alg is desirable but not possible + because the ragel implementation does not assume a finite alphabet, which + Hopcroft's requires. Ragel will support arbitrary integer alphabets which + must be treated as an infinite set for implementation considerations. + -New option -m using above described minimization to replace all previous + minimization options. Old options sill work but are obsolete and not + advertised with -h. + -Bug fixed in goto style output. The error exit set the current state to 0, + which is actually a valid state. If the machine was entered again it would go + into the first state, very wrong. If the first state happened to be final then + an immediate finish would accept when in fact it should fail. + -Slightly better fsm minimization now capable due to clearing of the + transition ordering numbers just prior to minimization. + +Ragel 1.2.2 - May 25, 2002 +========================== + -Configuration option --prefix now works when installing. + -cc file extension changed to cpp for better portability. + -Unlink of output file upon error no longer happens, removes dependency on + unlink system command. + -All multiline strings removed: not standard c++. + -Awk build dependency removed. + -MSVC 6.0 added to the list of supported compilers (with some tweaking of + bison and flex output). + +Ragel 1.2.1 - May 13, 2002 +========================== + -Automatic dependencies were fixed, they were not working correctly. + -Updated AUTHORS file to reflect contributors. + -Code is more C++ standards compliant: compiles with g++ 3.0 + -Fixed bugs that only showed up in g++ 3.0 + -Latest (unreleased) Aapl. + -Configuration script bails out if bison++ is installed. Ragel will not + compile with bison++ because it is coded in c++ and bison++ automatically + generates a c++ parser. Ragel uses a c-style bison parser. + +Ragel 1.2.0 - May 3, 2002 +========================= + -Underlying graph structure now supports default transitions. The result is + that a transition does not need to be made for each char of the alphabet + when making 'extend' or '/./' machines. Ragel compiles machines that + use the aforementioned primitives WAY faster. + -The ugly hacks needed to pick default transitions now go away due to + the graph supporting default transitions directly. + -If -e is given, but minimization is not turned on, print a warning. + -Makefiles use automatic dependencies. + +Ragel 1.1.0 - April 15, 2002 +============================ + -Added goto fsm: much faster than any other fsm style. + -Default operator (if two machines are side by side with no operator + between them) is concatenation. First showed up in 1.0.4. + -The fsm machine no longer auotmatically builds the flat table for + transition indicies. Instead it keeps the key,ptr pair. In tabcodegen + the flat table is produced. This way very large alphabets with sparse + transitions will not consume large amounts of mem. This is also in prep + for fsm graph getting a default transition. + -Generated code contains a statement explicitly stating that ragel fsms + are NOT covered by the GPL. Technically, Ragel copies part of itself + to the output to make the generic fsm execution routine (for table driven + fsms only) and so the output could be considered under the GPL. But this + code is very trivial and could easlily be rewritten. The actual fsm data + is subject to the copyright of the source. To promote the use of Ragel, + a special exception is made for the part of the output copied from Ragel: + it may be used without restriction. + -Much more elegant code generation scheme is employed. Code generation + class members need only put the 'codegen' keyword after their 'void' type + in order to be automatically registerd to handle macros of the same name. + An awk script recognises this keyword and generates an appropriate driver. + -Ragel gets a test suite. + -Postfunc and prefunc go away because they are not supported by non + loop-driven fsms (goto, switch) and present duplicate functionality. + Universal funcs can be implemented by using $ operator. + -Automatic dependencies used in build system, no more make depend target. + -Code generation section in docs. + -Uses the latests aapl. + +Ragel 1.0.5 - March 3, 2002 +=========================== + -Bugfix in SetErrorState that caused an assertion failure when compiling + simple machines that did not have full transition tables (and thus did + not show up on any example machines). Assertion failure did not occur + when using the switch statement code as ragel does not call SetErrorState + in that case. + -Fixed some missing includes, now compiles on redhat. + -Moved the FsmMachTrans Compare class out of FsmMachTrans. Some compilers + don't deal with nested classes in templates too well. + -Removed old unused BASEREF in fsmgraph and ragel now compiles using + egcs-2.91.66 and presumably SUNWspro. The baseref is no longer needed + because states do not support being elements in multiple lists. I would + rather be able to support more compilers than have this feature. + -Started a README with compilation notes. Started an AUTHORS file. + -Started the user documentation. Describes basic machines and operators. + +Ragel 1.0.4 - March 1, 2002 +=========================== + -Ported to the version of Aapl just after 2.2.0 release. See + http://www.ragel.ca/aapl/ for details on aapl. + -Fixed a bug in the clang example: the newline machine was not stared. + -Added explanations to the clang and mailbox examples. This should + help people that want to learn the lanuage as the manual is far from + complete. + +Ragel 1.0.3 - Feb 2, 2002 +========================= + -Added aapl to the ragel tree. No longer requires you to download + and build aapl separately. Should avoid discouraging impatient users + from compiling ragel. + -Added the examples to the ragel tree. + -Added configure script checks for bison and flex. + -Fixed makefile so as not to die with newer versions of bison that + write the header of the parser to a .hh file. + -Started ChangeLog file. + +Ragel 1.0.2 - Jan 30, 2002 +========================== + -Bug fix in calculating highIndex for table based code. Was using + the length of out tranisition table rather than the value at the + end. + -If high/low index are at the limits, output a define in their place, + not the high/low values themselves so as not to cause compiler warnings. + -If the resulting machines don't have any indicies or functions, then + omit the empty unrefereced static arrays so as not to cause compiler + warnings about unused static vars. + -Fixed variable sized indicies support. The header cannot have any + reference to INDEX_TYPE as that info is not known at the time the header + data is written. Forces us to use a void * for pointers to indicies. In + the c++ versions we are forced to make much of the data non-member + static data in the code portion for the same reason. + +Ragel 1.0.1 - Jan 28, 2002 +========================== + -Exe name change from reglang to ragel. + -Added ftabcodegen output code style which uses a table for states and + transitions but uses a switch statement for the function execution. + -Reformatted options in usage dump to look better. + -Support escape sequences in [] sections of regular expressions. + +Ragel 1.0 - Jan 25, 2002 +======================== + -Initial release. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..9b16e8e --- /dev/null +++ b/Makefile.in @@ -0,0 +1,56 @@ +# +# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +SUBDIRS = common ragel rlcodegen test examples doc + +#************************************* + +# Programs +CXX = @CXX@ + +# Get the version info. +include version.mk + +# Rules. +all: + @cd common && $(MAKE) && cd ../ragel && $(MAKE) && cd ../rlcodegen && $(MAKE) + +new-version: + sed 's/^\(Version:[[:space:]]*\)[0-9.]*$$/\1$(VERSION)/' ragel.spec > spec-new + cat spec-new > ragel.spec && rm spec-new + +distclean: distclean-rec distclean-local + +distclean-rec: + for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean; cd ..; done + +distclean-local: clean-local + rm -f Makefile config.cache config.status config.log + +clean: clean-rec clean-local + +clean-rec: + for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean; cd ..; done + +clean-local: + rm -f tags + +install: + @cd ragel && $(MAKE) install && cd ../rlcodegen && $(MAKE) install @@ -0,0 +1,54 @@ + + Ragel State Machine Compiler -- README + ====================================== + +1. Build Requirements +--------------------- + + * GNU Make + * g++ + +If you would like to modify Ragel and need to build Ragel's scanners and +parsers from the specifications then set BUILD_PARSERS=true in the configure +script and then run it. To build the parsers you will need the following +programs: + + * flex + * bison (recent version and not bison++, see below) + * gperf + +To build the user guide the following extra programs are needed: + + * fig2dev + * pdflatex + + +2. Compilation +-------------- + +To configure type './configure'. The makefiles honour the --prefix option to +specify where the program is to be installed to. + +To build the ragel program type 'make'. + +To build all the documentation cd to 'doc' and type 'make'. If you don't have +all of the programs to build the user guide and just want the man page use +'make ragel.1 rlcodegen.1'. + + +3. Installing +------------- + +The command 'make install' will build the programs and install them to $PREFIX/bin/. +A 'make install' in the doc directory will make and install all the +documentation. The man pages install to $PREFIX/man/man1/ and the user guide +and ChangeLog install to $PREFIX/share/doc/ragel/. To install just the man page +use 'make man-install'. + + +4. Why Ragel cannot be built with Bison++ +----------------------------------------- +Ragel is written in C++ using a C-style parser. Bison++ sees that we are using +C++ and generates classes, which breaks the build. As of last investigation, +this can't be stopped. Bison++ is therefore only compatible with Bison if you +are implementing a C-style parser in C. @@ -0,0 +1,48 @@ +fbreak should advance the current char. Depreciate fbreak and add + fctl_break; + fctl_return <expr>; + fctl_goto <label>; + +It should be possible to import/export definitions. + +If a scanner can be optimized into a pure state machine, maybe permit it to be +referenced as a machine definition. Alternately: inline scanners with an +explicit exit pattern. + +The split codegen needs a profiler connected to a graph partitioning algorithm. + +Die a graceful death when rlcodegen -F receives large alphabets. + +It's not currently possible to have more than one machine in a single function +because of label conflicts. Labels should have a unique prefix. + +Emit a warning when a subtraction has no effect. + +Emit a warning when unnamed priorities are used in longest match machines. +These priorities may unexpectedly interact across longest-match items. Changing +the language such that unwated interaction cannot happen would require naming +longest-match items. + +Testing facilities: Quick easy way to query which strings are accepted. +Enumerate all accepted strings. From Nicholas Maxwell Lester. + +Add more examples, add more tests and write more documentation. + +A debugger would be nice. Ragel could emit a special debug version that +prompted for debug commands that allowed the user to step through the machine +and get details about where they are in their RL. + +Frontend should allow the redefinition of fsm section delimiters. + +Do more to obscure ragel's private variables. Just a leading underscore is not +enough. Maybe something more like __ri__. + +Some talk about capturing data: + +Separate tokstart/tokend from the backtracking. One var for preservation, +called preserve. Write delcarations; produces the necessary variables used by +ragel. Move pattern start pattern end concepts into the general? The +variables which may need to influence the preserve is dependent on the state. +States have a concept of which variables are in use. Can be used for length +restrictions. If there is an exit pattern, it is the explicit way out, +otherwise the start state and all final states are a way out. diff --git a/aapl/COPYING b/aapl/COPYING new file mode 100644 index 0000000..c6ed510 --- /dev/null +++ b/aapl/COPYING @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/aapl/README b/aapl/README new file mode 100644 index 0000000..a2fa5e6 --- /dev/null +++ b/aapl/README @@ -0,0 +1,6 @@ +This directory contains the Aapl source distribution. For the +documentation, build scripts, test programs, ChangeLog, etc. get the +aapldev package. + +AaplDev and other information about Aapl is available from +http://www.elude.ca/aapl/ diff --git a/aapl/avlbasic.h b/aapl/avlbasic.h new file mode 100644 index 0000000..780ef07 --- /dev/null +++ b/aapl/avlbasic.h @@ -0,0 +1,65 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLBASIC_H +#define _AAPL_AVLBASIC_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlBasic + * \brief AVL Tree in which the entire element structure is the key. + * + * AvlBasic is an AVL tree that does not distinguish between the element that + * it contains and the key. The entire element structure is the key that is + * used to compare the relative ordering of elements. This is similar to the + * BstSet structure. + * + * AvlBasic does not assume ownership of elements in the tree. Items must be + * explicitly de-allocated. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Compare +#define AVLMEL_TEMPDEF class Element, class Compare +#define AVLMEL_TEMPUSE Element, Compare +#define AvlTree AvlBasic +#define AVL_BASIC + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_BASIC + +#endif /* _AAPL_AVLBASIC_H */ diff --git a/aapl/avlcommon.h b/aapl/avlcommon.h new file mode 100644 index 0000000..1984531 --- /dev/null +++ b/aapl/avlcommon.h @@ -0,0 +1,1622 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndef becuase it is not intended to + * be included by the user. */ + +#include <assert.h> + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#ifdef WALKABLE +/* This is used by AvlTree, AvlMel and AvlMelKey so it + * must be protected by global ifdefs. */ +#ifndef __AAPL_AVLI_EL__ +#define __AAPL_AVLI_EL__ + +/** + * \brief Tree element properties for linked AVL trees. + * + * AvliTreeEl needs to be inherited by classes that intend to be element in an + * AvliTree. + */ +template<class SubClassEl> struct AvliTreeEl +{ + /** + * \brief Tree pointers connecting element in a tree. + */ + SubClassEl *left, *right, *parent; + + /** + * \brief Linked list pointers. + */ + SubClassEl *prev, *next; + + /** + * \brief Height of the tree rooted at this element. + * + * Height is required by the AVL balancing algorithm. + */ + long height; +}; +#endif /* __AAPL_AVLI_EL__ */ + +#else /* not WALKABLE */ + +/* This is used by All the non walkable trees so it must be + * protected by a global ifdef. */ +#ifndef __AAPL_AVL_EL__ +#define __AAPL_AVL_EL__ +/** + * \brief Tree element properties for linked AVL trees. + * + * AvlTreeEl needs to be inherited by classes that intend to be element in an + * AvlTree. + */ +template<class SubClassEl> struct AvlTreeEl +{ + /** + * \brief Tree pointers connecting element in a tree. + */ + SubClassEl *left, *right, *parent; + + /** + * \brief Height of the tree rooted at this element. + * + * Height is required by the AVL balancing algorithm. + */ + long height; +}; +#endif /* __AAPL_AVL_EL__ */ +#endif /* def WALKABLE */ + + +#if defined( AVLTREE_MAP ) + +#ifdef WALKABLE + +/** + * \brief Tree element for AvliMap + * + * Stores the key and value pair. + */ +template <class Key, class Value> struct AvliMapEl : + public AvliTreeEl< AvliMapEl<Key, Value> > +{ + AvliMapEl(const Key &key) + : key(key) { } + AvliMapEl(const Key &key, const Value &value) + : key(key), value(value) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; + + /** \brief The value. */ + Value value; +}; +#else /* not WALKABLE */ + +/** + * \brief Tree element for AvlMap + * + * Stores the key and value pair. + */ +template <class Key, class Value> struct AvlMapEl : + public AvlTreeEl< AvlMapEl<Key, Value> > +{ + AvlMapEl(const Key &key) + : key(key) { } + AvlMapEl(const Key &key, const Value &value) + : key(key), value(value) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; + + /** \brief The value. */ + Value value; +}; +#endif /* def WALKABLE */ + +#elif defined( AVLTREE_SET ) + +#ifdef WALKABLE +/** + * \brief Tree element for AvliSet + * + * Stores the key. + */ +template <class Key> struct AvliSetEl : + public AvliTreeEl< AvliSetEl<Key> > +{ + AvliSetEl(const Key &key) : key(key) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; +}; +#else /* not WALKABLE */ +/** + * \brief Tree element for AvlSet + * + * Stores the key. + */ +template <class Key> struct AvlSetEl : + public AvlTreeEl< AvlSetEl<Key> > +{ + AvlSetEl(const Key &key) : key(key) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; +}; +#endif /* def WALKABLE */ + +#endif /* AVLTREE_SET */ + +/* Common AvlTree Class */ +template < AVLMEL_CLASSDEF > class AvlTree +#if !defined( AVL_KEYLESS ) && defined ( WALKABLE ) + : public Compare, public BASELIST +#elif !defined( AVL_KEYLESS ) + : public Compare +#elif defined( WALKABLE ) + : public BASELIST +#endif +{ +public: + /** + * \brief Create an empty tree. + */ +#ifdef WALKABLE + AvlTree() : root(0), treeSize(0) { } +#else + AvlTree() : root(0), head(0), tail(0), treeSize(0) { } +#endif + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new elements. + */ + AvlTree(const AvlTree &other); + +#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET ) + /** + * \brief Clear the contents of the tree. + * + * All element are deleted. + */ + ~AvlTree() { empty(); } + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new element. If this tree contains items, they are first + * deleted. + * + * \returns A reference to this. + */ + AvlTree &operator=( const AvlTree &tree ); + + /** + * \brief Transfer the elements of another tree into this. + * + * First deletes all elements in this tree. + */ + void transfer( AvlTree &tree ); +#else + /** + * \brief Abandon all elements in the tree. + * + * Tree elements are not deleted. + */ + ~AvlTree() {} + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new element. If this tree contains items, they are + * abandoned. + * + * \returns A reference to this. + */ + AvlTree &operator=( const AvlTree &tree ); + + /** + * \brief Transfer the elements of another tree into this. + * + * All elements in this tree are abandoned first. + */ + void transfer( AvlTree &tree ); +#endif + +#ifndef AVL_KEYLESS + /* Insert a element into the tree. */ + Element *insert( Element *element, Element **lastFound = 0 ); + +#ifdef AVL_BASIC + /* Find a element in the tree. Returns the element if + * element exists, false otherwise. */ + Element *find( const Element *element ) const; + +#else + Element *insert( const Key &key, Element **lastFound = 0 ); + +#ifdef AVLTREE_MAP + Element *insert( const Key &key, const Value &val, + Element **lastFound = 0 ); +#endif + + /* Find a element in the tree. Returns the element if + * key exists, false otherwise. */ + Element *find( const Key &key ) const; + + /* Detach a element from the tree. */ + Element *detach( const Key &key ); + + /* Detach and delete a element from the tree. */ + bool remove( const Key &key ); +#endif /* AVL_BASIC */ +#endif /* AVL_KEYLESS */ + + /* Detach a element from the tree. */ + Element *detach( Element *element ); + + /* Detach and delete a element from the tree. */ + void remove( Element *element ); + + /* Free all memory used by tree. */ + void empty(); + + /* Abandon all element in the tree. Does not delete element. */ + void abandon(); + + /** Root element of the tree. */ + Element *root; + +#ifndef WALKABLE + Element *head, *tail; +#endif + + /** The number of element in the tree. */ + long treeSize; + + /** \brief Return the number of elements in the tree. */ + long length() const { return treeSize; } + + /** \brief Return the number of elements in the tree. */ + long size() const { return treeSize; } + + /* Various classes for setting the iterator */ + struct Iter; + struct IterFirst { IterFirst( const AvlTree &t ) : t(t) { } const AvlTree &t; }; + struct IterLast { IterLast( const AvlTree &t ) : t(t) { } const AvlTree &t; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + +#ifdef WALKABLE + /** + * \brief Avl Tree Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct from an avl tree and iterator-setting classes. */ + Iter( const AvlTree &t ) : ptr(t.head) { } + Iter( const IterFirst &af ) : ptr(af.t.head) { } + Iter( const IterLast &al ) : ptr(al.t.tail) { } + Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)) { } + Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)) { } + + /* Assign from a tree and iterator-setting classes. */ + Iter &operator=( const AvlTree &tree ) { ptr = tree.head; return *this; } + Iter &operator=( const IterFirst &af ) { ptr = af.t.head; return *this; } + Iter &operator=( const IterLast &al ) { ptr = al.t.tail; return *this; } + Iter &operator=( const IterNext &an ) { ptr = findNext(an.i.ptr); return *this; } + Iter &operator=( const IterPrev &ap ) { ptr = findPrev(ap.i.ptr); return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr->BASE_EL(prev) == 0; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr->BASE_EL(next) == 0; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++(); + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to next item. */ + inline Element *increment(); + + /** \brief Move to previous item. */ + inline Element *operator--(); + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Move to previous item. */ + inline Element *decrement(); + + /** \brief Return the next item. Does not modify this. */ + IterNext next() const { return IterNext( *this ); } + + /** \brief Return the previous item. Does not modify this. */ + IterPrev prev() const { return IterPrev( *this ); } + + private: + static Element *findPrev( Element *element ) { return element->BASE_EL(prev); } + static Element *findNext( Element *element ) { return element->BASE_EL(next); } + + public: + + /** \brief The iterator is simply a pointer. */ + Element *ptr; + }; + +#else + + /** + * \brief Avl Tree Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0), tree(0) { } + + /* Construct from a tree and iterator-setting classes. */ + Iter( const AvlTree &t ) : ptr(t.head), tree(&t) { } + Iter( const IterFirst &af ) : ptr(af.t.head), tree(&af.t) { } + Iter( const IterLast &al ) : ptr(al.t.tail), tree(&al.t) { } + Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)), tree(an.i.tree) { } + Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)), tree(ap.i.tree) { } + + /* Assign from a tree and iterator-setting classes. */ + Iter &operator=( const AvlTree &t ) + { ptr = t.head; tree = &t; return *this; } + Iter &operator=( const IterFirst &af ) + { ptr = af.t.head; tree = &af.t; return *this; } + Iter &operator=( const IterLast &al ) + { ptr = al.t.tail; tree = &al.t; return *this; } + Iter &operator=( const IterNext &an ) + { ptr = findNext(an.i.ptr); tree = an.i.tree; return *this; } + Iter &operator=( const IterPrev &ap ) + { ptr = findPrev(ap.i.ptr); tree = ap.i.tree; return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr == tree->head; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr == tree->tail; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++(); + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to next item. */ + inline Element *increment(); + + /** \brief Move to previous item. */ + inline Element *operator--(); + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Move to previous item. */ + inline Element *decrement(); + + /** \brief Return the next item. Does not modify this. */ + IterNext next() const { return IterNext( *this ); } + + /** \brief Return the previous item. Does not modify this. */ + IterPrev prev() const { return IterPrev( *this ); } + + private: + static Element *findPrev( Element *element ); + static Element *findNext( Element *element ); + + public: + /** \brief The iterator is simply a pointer. */ + Element *ptr; + + /* The list is not walkable so we need to keep a pointerto the tree + * so we can test against head and tail in O(1) time. */ + const AvlTree *tree; + }; +#endif + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + /* Recursive worker for the copy constructor. */ + Element *copyBranch( Element *element ); + + /* Recursively delete element in the tree. */ + void deleteChildrenOf(Element *n); + + /* rebalance the tree beginning at the leaf whose + * grandparent is unbalanced. */ + Element *rebalance(Element *start); + + /* Move up the tree from a given element, recalculating the heights. */ + void recalcHeights(Element *start); + + /* Move up the tree and find the first element whose + * grand-parent is unbalanced. */ + Element *findFirstUnbalGP(Element *start); + + /* Move up the tree and find the first element which is unbalanced. */ + Element *findFirstUnbalEl(Element *start); + + /* Replace a element in the tree with another element not in the tree. */ + void replaceEl(Element *element, Element *replacement); + + /* Remove a element from the tree and put another (normally a child of element) + * in its place. */ + void removeEl(Element *element, Element *filler); + + /* Once an insertion point is found at a leaf then do the insert. */ + void attachRebal( Element *element, Element *parentEl, Element *lastLess ); +}; + +/* Copy constructor. New up each item. */ +template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE>:: + AvlTree(const AvlTree<AVLMEL_TEMPUSE> &other) +#ifdef WALKABLE +: + /* Make an empty list, copyBranch will fill in the details for us. */ + BASELIST() +#endif +{ + treeSize = other.treeSize; + root = other.root; + +#ifndef WALKABLE + head = 0; + tail = 0; +#endif + + /* If there is a root, copy the tree. */ + if ( other.root != 0 ) + root = copyBranch( other.root ); +} + +#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET ) + +/* Assignment does deep copy. */ +template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE> &AvlTree<AVLMEL_TEMPUSE>:: + operator=( const AvlTree &other ) +{ + /* Clear the tree first. */ + empty(); + + /* Reset the list pointers, the tree copy will fill in the list for us. */ +#ifdef WALKABLE + BASELIST::abandon(); +#else + head = 0; + tail = 0; +#endif + + /* Copy the entire tree. */ + treeSize = other.treeSize; + root = other.root; + if ( other.root != 0 ) + root = copyBranch( other.root ); + return *this; +} + +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + transfer(AvlTree<AVLMEL_TEMPUSE> &other) +{ + /* Clear the tree first. */ + empty(); + + treeSize = other.treeSize; + root = other.root; + +#ifdef WALKABLE + BASELIST::shallowCopy( other ); +#else + head = other.head; + tail = other.tail; +#endif + + other.abandon(); +} + +#else /* ! AVLTREE_MAP && ! AVLTREE_SET */ + +/* Assignment does deep copy. This version does not clear the tree first. */ +template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE> &AvlTree<AVLMEL_TEMPUSE>:: + operator=( const AvlTree &other ) +{ + /* Reset the list pointers, the tree copy will fill in the list for us. */ +#ifdef WALKABLE + BASELIST::abandon(); +#else + head = 0; + tail = 0; +#endif + + /* Copy the entire tree. */ + treeSize = other.treeSize; + root = other.root; + if ( other.root != 0 ) + root = copyBranch( other.root ); + return *this; +} + +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + transfer(AvlTree<AVLMEL_TEMPUSE> &other) +{ + treeSize = other.treeSize; + root = other.root; + +#ifdef WALKABLE + BASELIST::shallowCopy( other ); +#else + head = other.head; + tail = other.tail; +#endif + + other.abandon(); +} + +#endif + +/* + * Iterator operators. + */ + +/* Prefix ++ */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + operator++() +{ + return ptr = findNext( ptr ); +} + +/* Postfix ++ */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + operator++(int) +{ + Element *rtn = ptr; + ptr = findNext( ptr ); + return rtn; +} + +/* increment */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + increment() +{ + return ptr = findNext( ptr ); +} + +/* Prefix -- */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + operator--() +{ + return ptr = findPrev( ptr ); +} + +/* Postfix -- */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + operator--(int) +{ + Element *rtn = ptr; + ptr = findPrev( ptr ); + return rtn; +} + +/* decrement */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + decrement() +{ + return ptr = findPrev( ptr ); +} + +#ifndef WALKABLE + +/* Move ahead one. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + findNext( Element *element ) +{ + /* Try to go right once then infinite left. */ + if ( element->BASE_EL(right) != 0 ) { + element = element->BASE_EL(right); + while ( element->BASE_EL(left) != 0 ) + element = element->BASE_EL(left); + } + else { + /* Go up to parent until we were just a left child. */ + while ( true ) { + Element *last = element; + element = element->BASE_EL(parent); + if ( element == 0 || element->BASE_EL(left) == last ) + break; + } + } + return element; +} + +/* Move back one. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter:: + findPrev( Element *element ) +{ + /* Try to go left once then infinite right. */ + if ( element->BASE_EL(left) != 0 ) { + element = element->BASE_EL(left); + while ( element->BASE_EL(right) != 0 ) + element = element->BASE_EL(right); + } + else { + /* Go up to parent until we were just a left child. */ + while ( true ) { + Element *last = element; + element = element->BASE_EL(parent); + if ( element == 0 || element->BASE_EL(right) == last ) + break; + } + } + return element; +} + +#endif + + +/* Recursive worker for tree copying. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + copyBranch( Element *element ) +{ + /* Duplicate element. Either the base element's copy constructor or defaul + * constructor will get called. Both will suffice for initting the + * pointers to null when they need to be. */ + Element *retVal = new Element(*element); + + /* If the left tree is there, copy it. */ + if ( retVal->BASE_EL(left) ) { + retVal->BASE_EL(left) = copyBranch(retVal->BASE_EL(left)); + retVal->BASE_EL(left)->BASE_EL(parent) = retVal; + } + +#ifdef WALKABLE + BASELIST::addAfter( BASELIST::tail, retVal ); +#else + if ( head == 0 ) + head = retVal; + tail = retVal; +#endif + + /* If the right tree is there, copy it. */ + if ( retVal->BASE_EL(right) ) { + retVal->BASE_EL(right) = copyBranch(retVal->BASE_EL(right)); + retVal->BASE_EL(right)->BASE_EL(parent) = retVal; + } + return retVal; +} + +/* Once an insertion position is found, attach a element to the tree. */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + attachRebal( Element *element, Element *parentEl, Element *lastLess ) +{ + /* Increment the number of element in the tree. */ + treeSize += 1; + + /* Set element's parent. */ + element->BASE_EL(parent) = parentEl; + + /* New element always starts as a leaf with height 1. */ + element->BASE_EL(left) = 0; + element->BASE_EL(right) = 0; + element->BASE_EL(height) = 1; + + /* Are we inserting in the tree somewhere? */ + if ( parentEl != 0 ) { + /* We have a parent so we are somewhere in the tree. If the parent + * equals lastLess, then the last traversal in the insertion went + * left, otherwise it went right. */ + if ( lastLess == parentEl ) { + parentEl->BASE_EL(left) = element; +#ifdef WALKABLE + BASELIST::addBefore( parentEl, element ); +#endif + } + else { + parentEl->BASE_EL(right) = element; +#ifdef WALKABLE + BASELIST::addAfter( parentEl, element ); +#endif + } + +#ifndef WALKABLE + /* Maintain the first and last pointers. */ + if ( head->BASE_EL(left) == element ) + head = element; + + /* Maintain the first and last pointers. */ + if ( tail->BASE_EL(right) == element ) + tail = element; +#endif + } + else { + /* No parent element so we are inserting the root. */ + root = element; +#ifdef WALKABLE + BASELIST::addAfter( BASELIST::tail, element ); +#else + head = tail = element; +#endif + } + + + /* Recalculate the heights. */ + recalcHeights(parentEl); + + /* Find the first unbalance. */ + Element *ub = findFirstUnbalGP(element); + + /* rebalance. */ + if ( ub != 0 ) + { + /* We assert that after this single rotation the + * tree is now properly balanced. */ + rebalance(ub); + } +} + +#ifndef AVL_KEYLESS + +/** + * \brief Insert an existing element into the tree. + * + * If the insert succeeds and lastFound is given then it is set to the element + * inserted. If the insert fails then lastFound is set to the existing element in + * the tree that has the same key as element. If the element's avl pointers are + * already in use then undefined behaviour results. + * + * \returns The element inserted upon success, null upon failure. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + insert( Element *element, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Attach underneath the leaf and rebalance. */ + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + +#ifdef AVL_BASIC + keyRelation = compare( *element, *curEl ); +#else + keyRelation = compare( element->BASEKEY(getKey()), + curEl->BASEKEY(getKey()) ); +#endif + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +#ifdef AVL_BASIC + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + find( const Element *element ) const +{ + Element *curEl = root; + long keyRelation; + + while (curEl) { + keyRelation = compare( *element, *curEl ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->BASE_EL(left); + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->BASE_EL(right); + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + +#else + +/** + * \brief Insert a new element into the tree with given key. + * + * If the key is not already in the tree then a new element is made using the + * Element(const Key &key) constructor and the insert succeeds. If lastFound is + * given then it is set to the element inserted. If the insert fails then + * lastFound is set to the existing element in the tree that has the same key as + * element. + * + * \returns The new element upon success, null upon failure. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + insert( const Key &key, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + Element *element = new Element( key ); + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare( key, curEl->BASEKEY(getKey()) ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +#ifdef AVLTREE_MAP +/** + * \brief Insert a new element into the tree with key and value. + * + * If the key is not already in the tree then a new element is constructed and + * the insert succeeds. If lastFound is given then it is set to the element + * inserted. If the insert fails then lastFound is set to the existing element in + * the tree that has the same key as element. This insert routine is only + * available in AvlMap because it is the only class that knows about a Value + * type. + * + * \returns The new element upon success, null upon failure. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + insert( const Key &key, const Value &val, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + Element *element = new Element( key, val ); + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare(key, curEl->getKey()); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} +#endif /* AVLTREE_MAP */ + + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + find( const Key &key ) const +{ + Element *curEl = root; + long keyRelation; + + while (curEl) { + keyRelation = compare( key, curEl->BASEKEY(getKey()) ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->BASE_EL(left); + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->BASE_EL(right); + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + + +/** + * \brief Find a element, then detach it from the tree. + * + * The element is not deleted. + * + * \returns The element detached if the key is found, othewise returns null. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + detach(const Key &key) +{ + Element *element = find( key ); + if ( element ) { + detach(element); + } + + return element; +} + +/** + * \brief Find, detach and delete a element from the tree. + * + * \returns True if the element was found and deleted, false otherwise. + */ +template <AVLMEL_TEMPDEF> bool AvlTree<AVLMEL_TEMPUSE>:: + remove(const Key &key) +{ + /* Assume not found. */ + bool retVal = false; + + /* Look for the key. */ + Element *element = find( key ); + if ( element != 0 ) { + /* If found, detach the element and delete. */ + detach( element ); + delete element; + retVal = true; + } + + return retVal; +} + +#endif /* AVL_BASIC */ +#endif /* AVL_KEYLESS */ + + +/** + * \brief Detach and delete a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + remove(Element *element) +{ + /* Detach and delete. */ + detach(element); + delete element; +} + +/** + * \brief Detach a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + * + * \returns The element given. + */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + detach(Element *element) +{ + Element *replacement, *fixfrom; + long lheight, rheight; + +#ifdef WALKABLE + /* Remove the element from the ordered list. */ + BASELIST::detach( element ); +#endif + + /* Update treeSize. */ + treeSize--; + + /* Find a replacement element. */ + if (element->BASE_EL(right)) + { + /* Find the leftmost element of the right subtree. */ + replacement = element->BASE_EL(right); + while (replacement->BASE_EL(left)) + replacement = replacement->BASE_EL(left); + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->BASE_EL(parent) == element) + fixfrom = replacement; + else + fixfrom = replacement->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == head ) + head = replacement; +#endif + + removeEl(replacement, replacement->BASE_EL(right)); + replaceEl(element, replacement); + } + else if (element->BASE_EL(left)) + { + /* Find the rightmost element of the left subtree. */ + replacement = element->BASE_EL(left); + while (replacement->BASE_EL(right)) + replacement = replacement->BASE_EL(right); + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->BASE_EL(parent) == element) + fixfrom = replacement; + else + fixfrom = replacement->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == tail ) + tail = replacement; +#endif + + removeEl(replacement, replacement->BASE_EL(left)); + replaceEl(element, replacement); + } + else + { + /* We need to start fixing at the parent of the element. */ + fixfrom = element->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == head ) + head = element->BASE_EL(parent); + if ( element == tail ) + tail = element->BASE_EL(parent); +#endif + + /* The element we are deleting is a leaf element. */ + removeEl(element, 0); + } + + /* If fixfrom is null it means we just deleted + * the root of the tree. */ + if ( fixfrom == 0 ) + return element; + + /* Fix the heights after the deletion. */ + recalcHeights(fixfrom); + + /* Fix every unbalanced element going up in the tree. */ + Element *ub = findFirstUnbalEl(fixfrom); + while ( ub ) + { + /* Find the element to rebalance by moving down from the first unbalanced + * element 2 levels in the direction of the greatest heights. On the + * second move down, the heights may be equal ( but not on the first ). + * In which case go in the direction of the first move. */ + lheight = ub->BASE_EL(left) ? ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? ub->BASE_EL(right)->BASE_EL(height) : 0; + assert( lheight != rheight ); + if (rheight > lheight) + { + ub = ub->BASE_EL(right); + lheight = ub->BASE_EL(left) ? + ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? + ub->BASE_EL(right)->BASE_EL(height) : 0; + if (rheight > lheight) + ub = ub->BASE_EL(right); + else if (rheight < lheight) + ub = ub->BASE_EL(left); + else + ub = ub->BASE_EL(right); + } + else + { + ub = ub->BASE_EL(left); + lheight = ub->BASE_EL(left) ? + ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? + ub->BASE_EL(right)->BASE_EL(height) : 0; + if (rheight > lheight) + ub = ub->BASE_EL(right); + else if (rheight < lheight) + ub = ub->BASE_EL(left); + else + ub = ub->BASE_EL(left); + } + + + /* rebalance returns the grandparant of the subtree formed + * by the element that were rebalanced. + * We must continue upward from there rebalancing. */ + fixfrom = rebalance(ub); + + /* Find the next unbalaced element. */ + ub = findFirstUnbalEl(fixfrom); + } + + return element; +} + + +/** + * \brief Empty the tree and delete all the element. + * + * Resets the tree to its initial state. + */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::empty() +{ + if ( root ) { + /* Recursively delete from the tree structure. */ + deleteChildrenOf(root); + delete root; + root = 0; + treeSize = 0; + +#ifdef WALKABLE + BASELIST::abandon(); +#endif + } +} + +/** + * \brief Forget all element in the tree. + * + * Does not delete element. Resets the the tree to it's initial state. + */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::abandon() +{ + root = 0; + treeSize = 0; + +#ifdef WALKABLE + BASELIST::abandon(); +#endif +} + +/* Recursively delete all the children of a element. */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + deleteChildrenOf( Element *element ) +{ + /* Recurse left. */ + if (element->BASE_EL(left)) { + deleteChildrenOf(element->BASE_EL(left)); + + /* Delete left element. */ + delete element->BASE_EL(left); + element->BASE_EL(left) = 0; + } + + /* Recurse right. */ + if (element->BASE_EL(right)) { + deleteChildrenOf(element->BASE_EL(right)); + + /* Delete right element. */ + delete element->BASE_EL(right); + element->BASE_EL(left) = 0; + } +} + +/* rebalance from a element whose gradparent is unbalanced. Only + * call on a element that has a grandparent. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + rebalance(Element *n) +{ + long lheight, rheight; + Element *a, *b, *c; + Element *t1, *t2, *t3, *t4; + + Element *p = n->BASE_EL(parent); /* parent (Non-NUL). L*/ + Element *gp = p->BASE_EL(parent); /* Grand-parent (Non-NULL). */ + Element *ggp = gp->BASE_EL(parent); /* Great grand-parent (may be NULL). */ + + if (gp->BASE_EL(right) == p) + { + /* gp + * \ + * p + */ + if (p->BASE_EL(right) == n) + { + /* gp + * \ + * p + * \ + * n + */ + a = gp; + b = p; + c = n; + t1 = gp->BASE_EL(left); + t2 = p->BASE_EL(left); + t3 = n->BASE_EL(left); + t4 = n->BASE_EL(right); + } + else + { + /* gp + * \ + * p + * / + * n + */ + a = gp; + b = n; + c = p; + t1 = gp->BASE_EL(left); + t2 = n->BASE_EL(left); + t3 = n->BASE_EL(right); + t4 = p->BASE_EL(right); + } + } + else + { + /* gp + * / + * p + */ + if (p->BASE_EL(right) == n) + { + /* gp + * / + * p + * \ + * n + */ + a = p; + b = n; + c = gp; + t1 = p->BASE_EL(left); + t2 = n->BASE_EL(left); + t3 = n->BASE_EL(right); + t4 = gp->BASE_EL(right); + } + else + { + /* gp + * / + * p + * / + * n + */ + a = n; + b = p; + c = gp; + t1 = n->BASE_EL(left); + t2 = n->BASE_EL(right); + t3 = p->BASE_EL(right); + t4 = gp->BASE_EL(right); + } + } + + /* Perform rotation. + */ + + /* Tie b to the great grandparent. */ + if ( ggp == 0 ) + root = b; + else if ( ggp->BASE_EL(left) == gp ) + ggp->BASE_EL(left) = b; + else + ggp->BASE_EL(right) = b; + b->BASE_EL(parent) = ggp; + + /* Tie a as a leftchild of b. */ + b->BASE_EL(left) = a; + a->BASE_EL(parent) = b; + + /* Tie c as a rightchild of b. */ + b->BASE_EL(right) = c; + c->BASE_EL(parent) = b; + + /* Tie t1 as a leftchild of a. */ + a->BASE_EL(left) = t1; + if ( t1 != 0 ) t1->BASE_EL(parent) = a; + + /* Tie t2 as a rightchild of a. */ + a->BASE_EL(right) = t2; + if ( t2 != 0 ) t2->BASE_EL(parent) = a; + + /* Tie t3 as a leftchild of c. */ + c->BASE_EL(left) = t3; + if ( t3 != 0 ) t3->BASE_EL(parent) = c; + + /* Tie t4 as a rightchild of c. */ + c->BASE_EL(right) = t4; + if ( t4 != 0 ) t4->BASE_EL(parent) = c; + + /* The heights are all recalculated manualy and the great + * grand-parent is passed to recalcHeights() to ensure + * the heights are correct up the tree. + * + * Note that recalcHeights() cuts out when it comes across + * a height that hasn't changed. + */ + + /* Fix height of a. */ + lheight = a->BASE_EL(left) ? a->BASE_EL(left)->BASE_EL(height) : 0; + rheight = a->BASE_EL(right) ? a->BASE_EL(right)->BASE_EL(height) : 0; + a->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of c. */ + lheight = c->BASE_EL(left) ? c->BASE_EL(left)->BASE_EL(height) : 0; + rheight = c->BASE_EL(right) ? c->BASE_EL(right)->BASE_EL(height) : 0; + c->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b. */ + lheight = a->BASE_EL(height); + rheight = c->BASE_EL(height); + b->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b's parents. */ + recalcHeights(ggp); + return ggp; +} + +/* Recalculates the heights of all the ancestors of element. */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + recalcHeights(Element *element) +{ + long lheight, rheight, new_height; + while ( element != 0 ) + { + lheight = element->BASE_EL(left) ? element->BASE_EL(left)->BASE_EL(height) : 0; + rheight = element->BASE_EL(right) ? element->BASE_EL(right)->BASE_EL(height) : 0; + + new_height = (lheight > rheight ? lheight : rheight) + 1; + + /* If there is no chage in the height, then there will be no + * change in any of the ancestor's height. We can stop going up. + * If there was a change, continue upward. */ + if (new_height == element->BASE_EL(height)) + return; + else + element->BASE_EL(height) = new_height; + + element = element->BASE_EL(parent); + } +} + +/* Finds the first element whose grandparent is unbalanced. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + findFirstUnbalGP(Element *element) +{ + long lheight, rheight, balanceProp; + Element *gp; + + if ( element == 0 || element->BASE_EL(parent) == 0 || + element->BASE_EL(parent)->BASE_EL(parent) == 0 ) + return 0; + + /* Don't do anything if we we have no grandparent. */ + gp = element->BASE_EL(parent)->BASE_EL(parent); + while ( gp != 0 ) + { + lheight = gp->BASE_EL(left) ? gp->BASE_EL(left)->BASE_EL(height) : 0; + rheight = gp->BASE_EL(right) ? gp->BASE_EL(right)->BASE_EL(height) : 0; + balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->BASE_EL(parent); + gp = gp->BASE_EL(parent); + } + return 0; +} + + +/* Finds the first element that is unbalanced. */ +template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>:: + findFirstUnbalEl(Element *element) +{ + if ( element == 0 ) + return 0; + + while ( element != 0 ) + { + long lheight = element->BASE_EL(left) ? + element->BASE_EL(left)->BASE_EL(height) : 0; + long rheight = element->BASE_EL(right) ? + element->BASE_EL(right)->BASE_EL(height) : 0; + long balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->BASE_EL(parent); + } + return 0; +} + +/* Replace a element in the tree with another element not in the tree. */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + replaceEl(Element *element, Element *replacement) +{ + Element *parent = element->BASE_EL(parent), + *left = element->BASE_EL(left), + *right = element->BASE_EL(right); + + replacement->BASE_EL(left) = left; + if (left) + left->BASE_EL(parent) = replacement; + replacement->BASE_EL(right) = right; + if (right) + right->BASE_EL(parent) = replacement; + + replacement->BASE_EL(parent) = parent; + if (parent) + { + if (parent->BASE_EL(left) == element) + parent->BASE_EL(left) = replacement; + else + parent->BASE_EL(right) = replacement; + } + else + root = replacement; + + replacement->BASE_EL(height) = element->BASE_EL(height); +} + +/* Removes a element from a tree and puts filler in it's place. + * Filler should be null or a child of element. */ +template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>:: + removeEl(Element *element, Element *filler) +{ + Element *parent = element->BASE_EL(parent); + + if (parent) + { + if (parent->BASE_EL(left) == element) + parent->BASE_EL(left) = filler; + else + parent->BASE_EL(right) = filler; + } + else + root = filler; + + if (filler) + filler->BASE_EL(parent) = parent; + + return; +} + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/avlibasic.h b/aapl/avlibasic.h new file mode 100644 index 0000000..a48faaa --- /dev/null +++ b/aapl/avlibasic.h @@ -0,0 +1,67 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIBASIC_H +#define _AAPL_AVLIBASIC_H + +#include "compare.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliBasic + * \brief Linked AVL Tree in which the entire element structure is the key. + * + * AvliBasic is a linked AVL tree that does not distinguish between the + * element that it contains and the key. The entire element structure is the + * key that is used to compare the relative ordering of elements. This is + * similar to the BstSet structure. + * + * AvliBasic does not assume ownership of elements in the tree. Items must be + * explicitly de-allocated. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Compare +#define AVLMEL_TEMPDEF class Element, class Compare +#define AVLMEL_TEMPUSE Element, Compare +#define AvlTree AvliBasic +#define AVL_BASIC +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_BASIC +#undef WALKABLE + +#endif /* _AAPL_AVLIBASIC_H */ diff --git a/aapl/avlikeyless.h b/aapl/avlikeyless.h new file mode 100644 index 0000000..559b75a --- /dev/null +++ b/aapl/avlikeyless.h @@ -0,0 +1,64 @@ +/* + * Copyright 2002, 2003 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIKEYLESS_H +#define _AAPL_AVLIKEYLESS_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliKeyless + * \brief Linked AVL tree that has no insert/find/remove functions that take a + * key. + * + * AvliKeyless is an implementation of the AVL tree rebalancing functionality + * only. It provides the common code for the tiny AVL tree implementations. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASELIST DListMel< Element, AvliTreeEl<Element> > +#define AVLMEL_CLASSDEF class Element +#define AVLMEL_TEMPDEF class Element +#define AVLMEL_TEMPUSE Element +#define AvlTree AvliKeyless +#define WALKABLE +#define AVL_KEYLESS + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE +#undef AVL_KEYLESS + +#endif /* _AAPL_AVLIKEYLESS_H */ diff --git a/aapl/avlimap.h b/aapl/avlimap.h new file mode 100644 index 0000000..38bfff7 --- /dev/null +++ b/aapl/avlimap.h @@ -0,0 +1,77 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMAP_H +#define _AAPL_AVLIMAP_H + +#include "compare.h" +#include "dlist.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMap + * \brief Linked key and value oriented AVL tree. + * + * AvliMap stores key and value pairs in elements that managed by the tree. It + * is intendend to be similar to map template found in the STL. AvliMap + * requires that a Key type, a Value type, and a class containing a compare() + * routine for Key be given. Items can be inserted with just a key or with a + * key and value pair. + * + * AvliMap assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlimap.cpp + */ + +/*@}*/ + +#define AVLTREE_MAP +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DList< AvliMapEl<Key,Value> > +#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Key, class Value, class Compare +#define AVLMEL_TEMPUSE Key, Value, Compare +#define AvlTree AvliMap +#define Element AvliMapEl<Key,Value> +#define WALKABLE + +#include "avlcommon.h" + +#undef AVLTREE_MAP +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element +#undef WALKABLE + +#endif /* _AAPL_AVLIMAP_H */ diff --git a/aapl/avlimel.h b/aapl/avlimel.h new file mode 100644 index 0000000..9442a99 --- /dev/null +++ b/aapl/avlimel.h @@ -0,0 +1,79 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMEL_H +#define _AAPL_AVLIMEL_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMel + * \brief Linked AVL tree for element appearing in multiple trees. + * + * AvliMel allows for an element to simultaneously be in multiple trees without + * the trees interferring with one another. For each tree that the element is + * to appear in, there must be a distinct set of AVL Tree management data that + * can be unambiguously referenced with some base class name. This name + * is passed to the tree as a template parameter and is used in the tree + * algorithms. + * + * The element must use the same key type and value in each tree that it + * appears in. If distinct keys are required, the AvliMelKey structure is + * available. + * + * AvliMel does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlimel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) name +#define BASELIST DListMel< Element, BaseEl > +#define AVLMEL_CLASSDEF class Element, class Key, \ + class BaseEl, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, \ + class BaseEl, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare +#define AvlTree AvliMel +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLIMEL_H */ diff --git a/aapl/avlimelkey.h b/aapl/avlimelkey.h new file mode 100644 index 0000000..faa56e8 --- /dev/null +++ b/aapl/avlimelkey.h @@ -0,0 +1,76 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMELKEY_H +#define _AAPL_AVLIMELKEY_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMelKey + * \brief Linked AVL tree for element appearing in multiple trees with different keys. + * + * AvliMelKey is similar to AvliMel, except that an additional template + * parameter, BaseKey, is provided for resolving ambiguous references to + * getKey(). This means that if an element is stored in multiple trees, each + * tree can use a different key for ordering the elements in it. Using + * AvliMelKey an array of data structures can be indexed with an O(log(n)) + * search on two or more of the values contained within it and without + * allocating any additional data. + * + * AvliMelKey does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlimelkey.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) BaseKey::name +#define BASELIST DListMel< Element, BaseEl > +#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare +#define AvlTree AvliMelKey +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLIMELKEY_H */ diff --git a/aapl/avliset.h b/aapl/avliset.h new file mode 100644 index 0000000..cf5be36 --- /dev/null +++ b/aapl/avliset.h @@ -0,0 +1,75 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLISET_H +#define _AAPL_AVLISET_H + +#include "compare.h" +#include "dlist.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliSet + * \brief Linked Key-only oriented tree. + * + * AvliSet stores only keys in elements that are managed by the tree. AvliSet + * requires that a Key type and a class containing a compare() routine + * for Key be given. Items are inserted with just a key value. + * + * AvliSet assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avliset.cpp + */ + +/*@}*/ + +#define AVLTREE_SET +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DList< AvliSetEl<Key> > +#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Key, class Compare +#define AVLMEL_TEMPUSE Key, Compare +#define AvlTree AvliSet +#define Element AvliSetEl<Key> +#define WALKABLE + +#include "avlcommon.h" + +#undef AVLTREE_SET +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element +#undef WALKABLE + +#endif /* _AAPL_AVLISET_H */ diff --git a/aapl/avlitree.h b/aapl/avlitree.h new file mode 100644 index 0000000..b053c96 --- /dev/null +++ b/aapl/avlitree.h @@ -0,0 +1,78 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLITREE_H +#define _AAPL_AVLITREE_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliTree + * \brief Linked AVL tree. + * + * AvliTree is the standard linked by-structure AVL tree. To use this + * structure the user must define an element type and give it the necessary + * properties. At the very least it must have a getKey() function that will be + * used to compare the relative ordering of elements and tree management data + * necessary for the AVL algorithm. An element type can acquire the management + * data by inheriting the AvliTreeEl class. + * + * AvliTree does not presume to manage the allocation of elements in the tree. + * The destructor will not delete the items in the tree, instead the elements + * must be explicitly de-allocated by the user if necessary and when it is + * safe to do so. The empty() routine will traverse the tree and delete all + * items. + * + * Since the tree does not manage the elements, it can contain elements that + * are allocated statically or that are part of another data structure. + * + * \include ex_avlitree.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DListMel< Element, AvliTreeEl<Element> > +#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, class Compare +#define AVLMEL_TEMPUSE Element, Key, Compare +#define AvlTree AvliTree +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLITREE_H */ diff --git a/aapl/avlkeyless.h b/aapl/avlkeyless.h new file mode 100644 index 0000000..3080513 --- /dev/null +++ b/aapl/avlkeyless.h @@ -0,0 +1,58 @@ +/* + * Copyright 2002, 2003 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLKEYLESS_H +#define _AAPL_AVLKEYLESS_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlKeyless + * \brief AVL tree that has no insert/find/remove functions that take a key. + * + * AvlKeyless is an implementation of the AVL tree rebalancing functionality + * only. It provides the common code for the tiny AVL tree implementations. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define AVLMEL_CLASSDEF class Element +#define AVLMEL_TEMPDEF class Element +#define AVLMEL_TEMPUSE Element +#define AvlTree AvlKeyless +#define AVL_KEYLESS + +#include "avlcommon.h" + +#undef BASE_EL +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_KEYLESS + +#endif /* _AAPL_AVLKEYLESS_H */ diff --git a/aapl/avlmap.h b/aapl/avlmap.h new file mode 100644 index 0000000..e4e1566 --- /dev/null +++ b/aapl/avlmap.h @@ -0,0 +1,74 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMAP_H +#define _AAPL_AVLMAP_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMap + * \brief Key and value oriented AVL tree. + * + * AvlMap stores key and value pairs in elements that managed by the tree. It + * is intendend to be similar to map template found in the STL. AvlMap + * requires that a Key type, a Value type, and a class containing a compare() + * routine for Key be given. Items can be inserted with just a key or with a + * key and value pair. + * + * AvlMap assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlmap.cpp + */ + +/*@}*/ + +#define AVLTREE_MAP +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Key, class Value, class Compare +#define AVLMEL_TEMPUSE Key, Value, Compare +#define AvlTree AvlMap +#define Element AvlMapEl<Key,Value> + +#include "avlcommon.h" + +#undef AVLTREE_MAP +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element + + + +#endif /* _AAPL_AVLMAP_H */ diff --git a/aapl/avlmel.h b/aapl/avlmel.h new file mode 100644 index 0000000..7bfad3b --- /dev/null +++ b/aapl/avlmel.h @@ -0,0 +1,74 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMEL_H +#define _AAPL_AVLMEL_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMel + * \brief AVL tree for elements appearing in multiple trees. + * + * AvlMel allows for an element to simultaneously be in multiple trees without + * the trees interferring with one another. For each tree that the element is + * to appear in, there must be a distinct set of AVL Tree management data that + * can be unambiguously referenced with some base class name. This name + * is passed to the tree as a template parameter and is used in the tree + * algorithms. + * + * The element must use the same key type and value in each tree that it + * appears in. If distinct keys are required, the AvlMelKey structure is + * available. + * + * AvlMel does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlmel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Key, \ + class BaseEl, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, \ + class BaseEl, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare +#define AvlTree AvlMel + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLMEL_H */ diff --git a/aapl/avlmelkey.h b/aapl/avlmelkey.h new file mode 100644 index 0000000..9261cc8 --- /dev/null +++ b/aapl/avlmelkey.h @@ -0,0 +1,71 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMELKEY_H +#define _AAPL_AVLMELKEY_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMelKey + * \brief AVL tree for elements appearing in multiple trees with different keys. + * + * AvlMelKey is similar to AvlMel, except that an additional template + * parameter, BaseKey, is provided for resolving ambiguous references to + * getKey(). This means that if an element is stored in multiple trees, each + * tree can use a different key for ordering the elements in it. Using + * AvlMelKey an array of data structures can be indexed with an O(log(n)) + * search on two or more of the values contained within it and without + * allocating any additional data. + * + * AvlMelKey does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlmelkey.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) BaseKey::name +#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare +#define AvlTree AvlMelKey + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLMELKEY_H */ diff --git a/aapl/avlset.h b/aapl/avlset.h new file mode 100644 index 0000000..224ee59 --- /dev/null +++ b/aapl/avlset.h @@ -0,0 +1,70 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLSET_H +#define _AAPL_AVLSET_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlSet + * \brief Key-only oriented tree. + * + * AvlSet stores only keys in elements that are managed by the tree. AvlSet + * requires that a Key type and a class containing a compare() routine + * for Key be given. Items are inserted with just a key value. + * + * AvlSet assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlset.cpp + */ + +/*@}*/ + +#define AVLTREE_SET +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Key, class Compare +#define AVLMEL_TEMPUSE Key, Compare +#define AvlTree AvlSet +#define Element AvlSetEl<Key> + +#include "avlcommon.h" + +#undef AVLTREE_SET +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element + +#endif /* _AAPL_AVLSET_H */ diff --git a/aapl/avltree.h b/aapl/avltree.h new file mode 100644 index 0000000..cf15359 --- /dev/null +++ b/aapl/avltree.h @@ -0,0 +1,73 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLTREE_H +#define _AAPL_AVLTREE_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlTree + * \brief Basic AVL tree. + * + * AvlTree is the standard by-structure AVL tree. To use this structure the + * user must define an element type and give it the necessary properties. At + * the very least it must have a getKey() function that will be used to + * compare the relative ordering of elements and tree management data + * necessary for the AVL algorithm. An element type can acquire the management + * data by inheriting the AvlTreeEl class. + * + * AvlTree does not presume to manage the allocation of elements in the tree. + * The destructor will not delete the items in the tree, instead the elements + * must be explicitly de-allocated by the user if necessary and when it is + * safe to do so. The empty() routine will traverse the tree and delete all + * items. + * + * Since the tree does not manage the elements, it can contain elements that + * are allocated statically or that are part of another data structure. + * + * \include ex_avltree.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd<Key> +#define AVLMEL_TEMPDEF class Element, class Key, class Compare +#define AVLMEL_TEMPUSE Element, Key, Compare +#define AvlTree AvlTree + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLTREE_H */ diff --git a/aapl/bstcommon.h b/aapl/bstcommon.h new file mode 100644 index 0000000..bd390cd --- /dev/null +++ b/aapl/bstcommon.h @@ -0,0 +1,814 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndefs because it is + * not intended to be included by users directly. */ + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/* Binary Search Table */ +template < BST_TEMPL_DECLARE > class BstTable : + public Compare, + public Vector< Element, Resize > +{ + typedef Vector<Element, Resize> BaseVector; + typedef Table<Element> BaseTable; + +public: + /** + * \brief Default constructor. + * + * Create an empty binary search table. + */ + BstTable() { } + + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial item. Uses the default + * constructor for initializing Value. + */ + BstTable(const Key &key) + { insert(key); } + +#if defined( BSTMAP ) + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial key/value pair. + */ + BstTable(const Key &key, const Value &val) + { insert(key, val); } +#endif + +#if ! defined( BSTSET ) + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial Element. + */ + BstTable(const Element &el) + { insert(el); } +#endif + + Element *insert(const Key &key, Element **lastFound = 0); + Element *insertMulti(const Key &key); + + bool insert(const BstTable &other); + void insertMulti(const BstTable &other); + +#if defined( BSTMAP ) + Element *insert(const Key &key, const Value &val, + Element **lastFound = 0); + Element *insertMulti(const Key &key, const Value &val ); +#endif + +#if ! defined( BSTSET ) + Element *insert(const Element &el, Element **lastFound = 0); + Element *insertMulti(const Element &el); +#endif + + Element *find(const Key &key, Element **lastFound = 0) const; + bool findMulti( const Key &key, Element *&lower, + Element *&upper ) const; + + bool remove(const Key &key); + bool remove(Element *item); + long removeMulti(const Key &key); + long removeMulti(Element *lower, Element *upper); + + /* The following provide access to the underlying insert and remove + * functions that my be hidden by the BST insert and remove. The insertDup + * and insertNew functions will never be hidden. They are provided for + * consistency. The difference between the non-shared and the shared + * tables is the documentation reference to the invoked function. */ + +#if !defined( SHARED_BST ) + /*@{*/ + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const T &val ). + */ + void vinsert(long pos, const Element &val) + { Vector< Element, Resize >::insert( pos, &val, 1 ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const T *val, long len ). + */ + void vinsert(long pos, const Element *val, long len) + { Vector< Element, Resize >::insert( pos, val, len ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const Vector &v ). + */ + void vinsert(long pos, const BstTable &v) + { Vector< Element, Resize >::insert( pos, v.data, v.tabLen ); } + + /*@}*/ + + /*@{*/ + + /** \brief Call the remove of the underlying vector. + * + * Provides access to the vector remove, which may become hidden. + * Invokes Vector::remove( long pos ). + */ + void vremove(long pos) + { Vector< Element, Resize >::remove( pos, 1 ); } + + /** \brief Call the remove of the underlying vector. + * + * Proves access to the vector remove, which may become hidden. + * Invokes Vector::remove( long pos, long len ). + */ + void vremove(long pos, long len) + { Vector< Element, Resize >::remove( pos, len ); } + + /*@}*/ +#else /* SHARED_BST */ + /*@{*/ + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const T &val ). + */ + void vinsert(long pos, const Element &val) + { Vector< Element, Resize >::insert( pos, &val, 1 ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const T *val, long len ). + */ + void vinsert(long pos, const Element *val, long len) + { Vector< Element, Resize >::insert( pos, val, len ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const SVector &v ). + */ + void vinsert(long pos, const BstTable &v) + { Vector< Element, Resize >::insert( pos, v.data, v.length() ); } + + /*@}*/ + + /*@{*/ + + /** \brief Call the remove of the underlying vector. + * + * Provides access to the vector remove, which may become hidden. + * Invokes SVector::remove( long pos ). + */ + void vremove(long pos) + { Vector< Element, Resize >::remove( pos, 1 ); } + + /** \brief Call the remove of the underlying vector. + * + * Proves access to the vector remove, which may become hidden. + * Invokes SVector::remove( long pos, long len ). + */ + void vremove(long pos, long len) + { Vector< Element, Resize >::remove( pos, len ); } + + /*@}*/ + +#endif /* SHARED_BST */ +}; + + +#if 0 +#if defined( SHARED_BST ) +/** + * \brief Construct a binary search table with an initial amount of + * allocation. + * + * The table is initialized to have room for allocLength elements. The + * table starts empty. + */ +template <BST_TEMPL_DEF> BstTable<BST_TEMPL_USE>:: + BstTable( long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + if ( allocLen > 0 ) { + /* Allocate the data needed. */ + STabHead *head = (STabHead*) + malloc( sizeof(STabHead) + sizeof(Element) * allocLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header and save the data pointer. */ + head->refCount = 1; + head->allocLen = allocLen; + head->tabLen = 0; + BaseTable::data = (Element*) (head + 1); + } +} +#else +/** + * \brief Construct a binary search table with an initial amount of + * allocation. + * + * The table is initialized to have room for allocLength elements. The + * table starts empty. + */ +template <BST_TEMPL_DEF> BstTable<BST_TEMPL_USE>:: + BstTable( long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + BaseTable::allocLen = allocLen; + if ( BaseTable::allocLen > 0 ) { + BaseTable::data = (Element*) malloc(sizeof(Element) * BaseTable::allocLen); + if ( BaseTable::data == NULL ) + throw std::bad_alloc(); + } +} + +#endif +#endif + +/** + * \brief Find the element with the given key and remove it. + * + * If multiple elements with the given key exist, then it is unspecified which + * element will be removed. + * + * \returns True if an element is found and consequently removed, false + * otherwise. + */ +template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>:: + remove(const Key &key) +{ + Element *el = find(key); + if ( el != 0 ) { + Vector< Element >::remove(el - BaseTable::data); + return true; + } + return false; +} + +/** + * \brief Remove the element pointed to by item. + * + * If item does not point to an element in the tree, then undefined behaviour + * results. If item is null, then remove has no effect. + * + * \returns True if item is not null, false otherwise. + */ +template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>:: + remove( Element *item ) +{ + if ( item != 0 ) { + Vector< Element >::remove(item - BaseTable::data); + return true; + } + return false; +} + +/** + * \brief Find and remove the entire range of elements with the given key. + * + * \returns The number of elements removed. + */ +template <BST_TEMPL_DEF> long BstTable<BST_TEMPL_USE>:: + removeMulti(const Key &key) +{ + Element *low, *high; + if ( findMulti(key, low, high) ) { + /* Get the length of the range. */ + long num = high - low + 1; + Vector< Element >::remove(low - BaseTable::data, num); + return num; + } + + return 0; +} + +template <BST_TEMPL_DEF> long BstTable<BST_TEMPL_USE>:: + removeMulti(Element *lower, Element *upper) +{ + /* Get the length of the range. */ + long num = upper - lower + 1; + Vector< Element >::remove(lower - BaseTable::data, num); + return num; +} + + +/** + * \brief Find a range of elements with the given key. + * + * If any elements with the given key exist then lower and upper are set to + * the low and high ends of the continous range of elements with the key. + * Lower and upper will point to the first and last elements with the key. + * + * \returns True if any elements are found, false otherwise. + */ +template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>:: + findMulti(const Key &key, Element *&low, Element *&high ) const +{ + const Element *lower, *mid, *upper; + long keyRelation; + const long tblLen = BaseTable::length(); + + if ( BaseTable::data == 0 ) + return false; + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. */ + return false; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + Element *lowEnd = BaseTable::data - 1; + Element *highEnd = BaseTable::data + tblLen; + + lower = mid - 1; + while ( lower != lowEnd && + compare(key, GET_KEY(*lower)) == 0 ) + lower--; + + upper = mid + 1; + while ( upper != highEnd && + compare(key, GET_KEY(*upper)) == 0 ) + upper++; + + low = (Element*)lower + 1; + high = (Element*)upper - 1; + return true; + } + } +} + +/** + * \brief Find an element with the given key. + * + * If the find succeeds then lastFound is set to the element found. If the + * find fails then lastFound is set the location where the key would be + * inserted. If there is more than one element in the tree with the given key, + * then it is unspecified which element is returned as the match. + * + * \returns The element found on success, null on failure. + */ +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + find( const Key &key, Element **lastFound ) const +{ + const Element *lower, *mid, *upper; + long keyRelation; + const long tblLen = BaseTable::length(); + + if ( BaseTable::data == 0 ) + return 0; + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key. Last found gets the insert location. */ + if ( lastFound != 0 ) + *lastFound = (Element*)lower; + return 0; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + /* Key is found. Last found gets the found record. */ + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return (Element*)mid; + } + } +} + +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insert(const Key &key, Element **lastFound) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key); + + /* Set lastFound */ + if ( lastFound != 0 ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + + +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insertMulti(const Key &key) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} + +/** + * \brief Insert each element from other. + * + * Always attempts to insert all elements even if the insert of some item from + * other fails. + * + * \returns True if all items inserted successfully, false if any insert + * failed. + */ +template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>:: + insert(const BstTable &other) +{ + bool allSuccess = true; + long otherLen = other.length(); + for ( long i = 0; i < otherLen; i++ ) { + Element *el = insert( other.data[i] ); + if ( el == 0 ) + allSuccess = false; + } + return allSuccess; +} + +/** + * \brief Insert each element from other even if the elements exist already. + * + * No individual insertMulti can fail. + */ +template <BST_TEMPL_DEF> void BstTable<BST_TEMPL_USE>:: + insertMulti(const BstTable &other) +{ + long otherLen = other.length(); + for ( long i = 0; i < otherLen; i++ ) + insertMulti( other.data[i] ); +} + +#if ! defined( BSTSET ) + +/** + * \brief Insert the given element. + * + * If the key in the given element does not already exist in the table then a + * new element is inserted. They element copy constructor is used to place the + * element into the table. If lastFound is given, it is set to the new element + * created. If the insert fails then lastFound is set to the existing element + * of the same key. + * + * \returns The new element created upon success, null upon failure. + */ +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insert(const Element &el, Element **lastFound ) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(GET_KEY(el), GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(el); + + /* Set lastFound */ + if ( lastFound != 0 ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + +/** + * \brief Insert the given element even if it exists already. + * + * If the key in the given element exists already then the new element is + * placed next to some other element of the same key. InsertMulti cannot fail. + * The element copy constructor is used to place the element in the table. + * + * \returns The new element created. + */ +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insertMulti(const Element &el) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(GET_KEY(el), GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(el); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} +#endif + + +#if defined( BSTMAP ) + +/** + * \brief Insert the given key-value pair. + * + * If the given key does not already exist in the table then the key-value + * pair is inserted. Copy constructors are used to place the pair in the + * table. If lastFound is given, it is set to the new entry created. If the + * insert fails then lastFound is set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insert(const Key &key, const Value &val, Element **lastFound) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = Compare::compare(key, mid->key); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != NULL ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key, val); + + /* Set lastFound */ + if ( lastFound != NULL ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + + +/** + * \brief Insert the given key-value pair even if the key exists already. + * + * If the key exists already then the key-value pair is placed next to some + * other pair of the same key. InsertMulti cannot fail. Copy constructors are + * used to place the pair in the table. + * + * \returns The new element created. + */ +template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>:: + insertMulti(const Key &key, const Value &val) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = Compare::compare(key, mid->key); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key, val); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} + +#endif + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/bstmap.h b/aapl/bstmap.h new file mode 100644 index 0000000..5154b86 --- /dev/null +++ b/aapl/bstmap.h @@ -0,0 +1,113 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTMAP_H +#define _AAPL_BSTMAP_H + +#include "compare.h" +#include "vector.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \brief Element for BstMap. + * + * Stores the key and value pair. + */ +template <class Key, class Value> struct BstMapEl +{ + BstMapEl() {} + BstMapEl(const Key &key) : key(key) {} + BstMapEl(const Key &key, const Value &val) : key(key), value(val) {} + + /** \brief The key */ + Key key; + + /** \brief The value. */ + Value value; +}; + +#ifdef AAPL_NAMESPACE +} +#endif + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstMap + * \brief Binary search table for key and value pairs. + * + * BstMap stores key and value pairs in each element. The key and value can be + * any type. A compare class for the key must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Key, class Value, \ + class Compare = CmpOrd<Key>, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize +#define BST_TEMPL_USE Key, Value, Compare, Resize +#define GET_KEY(el) ((el).key) +#define BstTable BstMap +#define Element BstMapEl<Key, Value> +#define BSTMAP + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Element +#undef BSTMAP + +/** + * \fn BstMap::insert(const Key &key, BstMapEl<Key, Value> **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then a new element + * having key is inserted. They key copy constructor and value default + * constructor are used to place the pair in the table. If lastFound is given, + * it is set to the new entry created. If the insert fails then lastFound is + * set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstMap::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then the new element having key is placed next + * to some other pair of the same key. InsertMulti cannot fail. The key copy + * constructor and the value default constructor are used to place the pair in + * the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTMAP_H */ diff --git a/aapl/bstset.h b/aapl/bstset.h new file mode 100644 index 0000000..ce710ee --- /dev/null +++ b/aapl/bstset.h @@ -0,0 +1,86 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTSET_H +#define _AAPL_BSTSET_H + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstSet + * \brief Binary search table for types that are the key. + * + * BstSet is suitable for types that comprise the entire key. Rather than look + * into the element to retrieve the key, the element is the key. A class that + * contains a comparison routine for the key must be given. + */ + +/*@}*/ + +#include "compare.h" +#include "vector.h" + +#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd<Key>, \ + class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Compare, class Resize +#define BST_TEMPL_USE Key, Compare, Resize +#define GET_KEY(el) (el) +#define BstTable BstSet +#define Element Key +#define BSTSET + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Element +#undef BSTSET + +/** + * \fn BstSet::insert(const Key &key, Key **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then it is inserted. + * The key's copy constructor is used to place the item in the table. If + * lastFound is given, it is set to the new entry created. If the insert fails + * then lastFound is set to the existing key of the same value. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstSet::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then it is placed next to some other key of the + * same value. InsertMulti cannot fail. The key's copy constructor is used to + * place the item in the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTSET_H */ diff --git a/aapl/bsttable.h b/aapl/bsttable.h new file mode 100644 index 0000000..9898ebf --- /dev/null +++ b/aapl/bsttable.h @@ -0,0 +1,84 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTTABLE_H +#define _AAPL_BSTTABLE_H + +#include "compare.h" +#include "vector.h" + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstTable + * \brief Binary search table for structures that contain a key. + * + * This is the basic binary search table. It can be used to contain a + * structure that has a key and possibly some data. The key should be a member + * of the element class and accessible with getKey(). A class containing the + * compare routine must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Element, class Key, \ + class Compare = CmpOrd<Key>, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize +#define BST_TEMPL_USE Element, Key, Compare, Resize +#define GET_KEY(el) ((el).getKey()) +#define BSTTABLE + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BSTTABLE + +/** + * \fn BstTable::insert(const Key &key, Element **lastFound) + * \brief Insert a new element with the given key. + * + * If the given key does not already exist in the table a new element is + * inserted with the given key. A constructor taking only const Key& is used + * to initialize the new element. If lastFound is given, it is set to the new + * element created. If the insert fails then lastFound is set to the existing + * element with the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstTable::insertMulti(const Key &key) + * \brief Insert a new element even if the key exists already. + * + * If the key exists already then the new element is placed next to some + * element with the same key. InsertMulti cannot fail. A constructor taking + * only const Key& is used to initialize the new element. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTTABLE_H */ diff --git a/aapl/bubblesort.h b/aapl/bubblesort.h new file mode 100644 index 0000000..20e0f6f --- /dev/null +++ b/aapl/bubblesort.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BUBBLESORT_H +#define _AAPL_BUBBLESORT_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class BubbleSort + * \brief Bubble sort an array of data. + * + * BubbleSort can be used to sort any array of objects of type T provided a + * compare class is given. BubbleSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * BubbleSort runs in O(n^2) time. It is most useful when sorting arrays that + * are nearly sorted. It is best when neighbouring pairs are out of place. + * BubbleSort is a stable sort, meaning that objects with the same key have + * their relative ordering preserved. + */ + +/*@}*/ + +/* BubbleSort. */ +template <class T, class Compare> class BubbleSort + : public Compare +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); +}; + + +/** + * \brief Bubble sort an array of data. + */ +template <class T, class Compare> void BubbleSort<T,Compare>:: + sort(T *data, long len) +{ + bool changed = true; + for ( long pass = 1; changed && pass < len; pass ++ ) { + changed = false; + for ( long i = 0; i < len-pass; i++ ) { + /* Do we swap pos with the next one? */ + if ( compare( data[i], data[i+1] ) > 0 ) { + char tmp[sizeof(T)]; + + /* Swap the two items. */ + memcpy( tmp, data+i, sizeof(T) ); + memcpy( data+i, data+i+1, sizeof(T) ); + memcpy( data+i+1, tmp, sizeof(T) ); + + /* Note that we made a change. */ + changed = true; + } + } + } +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_BUBBLESORT_H */ diff --git a/aapl/compare.h b/aapl/compare.h new file mode 100644 index 0000000..e537736 --- /dev/null +++ b/aapl/compare.h @@ -0,0 +1,260 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_COMPARE_H +#define _AAPL_COMPARE_H + +#include <string.h> +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \defgroup compare Compare + * \brief Basic compare clases. + * + * Compare classes are used by data structures that need to know the relative + * ordering of elemets. To become a compare class, a class must imlement a + * routine long compare(const T &key1, const T &key2) that behaves just like + * strcmp. + * + * Compare classes are passed to the template data structure as a template + * parameter and are inherited. In most cases the compare routine will base + * the key comparision only on the two keys and the compare routine can + * therefore be static. Though sometimes it is useful to include data in the + * compare class and use this data in the comparison. For example the compare + * class may contain a pointer to some other data structure to which the + * comparison is delegated. + * + * @{ + */ + +/** + * \brief Compare two null terminated character sequences. + * + * This comparision class is a wrapper for strcmp. + */ +struct CmpStr +{ + /** + * \brief Compare two null terminated string types. + */ + static inline long compare(const char *k1, const char *k2) + { return strcmp(k1, k2); } +}; + +/** + * \brief Compare a type for which < and > are implemented. + * + * CmpOrd is suitable for simple types such as integers and pointers that by + * default have the less-than and greater-than operators defined. + */ +template <class T> struct CmpOrd +{ + /** + * \brief Compare two ordinal types. + * + * This compare routine copies its arguements in by value. + */ + static inline long compare(const T k1, const T k2) + { + if (k1 < k2) + return -1; + else if (k1 > k2) + return 1; + else + return 0; + } +}; + +/** + * \brief Compare two tables of type T + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd<T> > struct CmpTable + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + static inline long compare(const Table<T> &t1, const Table<T> &t2) + { + if ( t1.tabLen < t2.tabLen ) + return -1; + else if ( t1.tabLen > t2.tabLen ) + return 1; + else + { + T *i1 = t1.data, *i2 = t2.data; + long len = t1.tabLen, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two tables of type T -- non-static version. + * + * CmpTableNs is identical to CmpTable, however the compare routine is + * non-static. If the CompareT class contains a non-static compare, then this + * version must be used because a static member cannot invoke a non-static + * member. + * + * Table comparison is useful for keying a data structure on a vector or binary + * search table. T is the element type stored in the table. CompareT + * is the comparison structure used to compare the individual values in the + * table. + */ +template < class T, class CompareT = CmpOrd<T> > struct CmpTableNs + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + inline long compare(const Table<T> &t1, const Table<T> &t2) + { + if ( t1.tabLen < t2.tabLen ) + return -1; + else if ( t1.tabLen > t2.tabLen ) + return 1; + else + { + T *i1 = t1.data, *i2 = t2.data; + long len = t1.tabLen, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two implicitly shared tables of type T + * + * This table comparison is for data structures based on implicitly + * shared tables. + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd<T> > struct CmpSTable : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + static inline long compare(const STable<T> &t1, const STable<T> &t2) + { + long t1Length = t1.length(); + long t2Length = t2.length(); + + /* Compare lengths. */ + if ( t1Length < t2Length ) + return -1; + else if ( t1Length > t2Length ) + return 1; + else { + /* Compare the table data. */ + T *i1 = t1.data, *i2 = t2.data; + for ( long pos = 0; pos < t1Length; + pos += 1, i1 += 1, i2 += 1 ) + { + long cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two implicitly shared tables of type T -- non-static + * version. + * + * This is a non-static table comparison for data structures based on + * implicitly shared tables. If the CompareT class contains a non-static + * compare, then this version must be used because a static member cannot + * invoke a non-static member. + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd<T> > struct CmpSTableNs + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + inline long compare(const STable<T> &t1, const STable<T> &t2) + { + long t1Length = t1.length(); + long t2Length = t2.length(); + + /* Compare lengths. */ + if ( t1Length < t2Length ) + return -1; + else if ( t1Length > t2Length ) + return 1; + else { + /* Compare the table data. */ + T *i1 = t1.data, *i2 = t2.data; + for ( long pos = 0; pos < t1Length; + pos += 1, i1 += 1, i2 += 1 ) + { + long cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + + +/*@}*/ + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_COMPARE_H */ diff --git a/aapl/dlcommon.h b/aapl/dlcommon.h new file mode 100644 index 0000000..5ce9bd3 --- /dev/null +++ b/aapl/dlcommon.h @@ -0,0 +1,790 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndef becuase it is not intended to + * be included by the user. */ + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#if defined( DOUBLELIST_VALUE ) +/** + * \brief Double list element for DListVal. + * + * DListValEl stores the type T of DListVal by value. + */ +template <class T> struct DListValEl +{ + /** + * \brief Construct a DListValEl with a given value. + * + * The only constructor available initializes the value element. This + * enforces that DListVal elements are never created without having their + * value intialzed by the user. T's copy constructor is used to copy the + * value in. + */ + DListValEl( const T &val ) : value(val) { } + + /** + * \brief Value stored by the list element. + * + * Value is always copied into new list elements using the copy + * constructor. + */ + T value; + + /** + * \brief List previous pointer. + * + * Points to the previous item in the list. If this is the first item in + * the list, then prev is NULL. If this element is not in a list then + * prev is undefined. + */ + DListValEl<T> *prev; + + /** + * \brief List next pointer. + * + * Points to the next item in the list. If this is the list item in the + * list, then next is NULL. If this element is not in a list then next is + * undefined. + */ + DListValEl<T> *next; +}; +#else + +#ifndef __AAPL_DOUBLE_LIST_EL +#define __AAPL_DOUBLE_LIST_EL +/** + * \brief Double list element properties. + * + * This class can be inherited to make a class suitable to be a double list + * element. It simply provides the next and previous pointers. An alternative + * is to put the next and previous pointers in the class directly. + */ +template <class Element> struct DListEl +{ + /** + * \brief List previous pointer. + * + * Points to the previous item in the list. If this is the first item in + * the list, then prev is NULL. If this element is not in a list then + * prev is undefined. + */ + Element *prev; + + /** + * \brief List next pointer. + * + * Points to the next item in the list. If this is the list item in the + * list, then next is NULL. If this element is not in a list then next is + * undefined. + */ + Element *next; +}; +#endif /* __AAPL_DOUBLE_LIST_EL */ + +#endif + +/* Doubly Linked List */ +template <DLMEL_TEMPDEF> class DList +{ +public: + /** \brief Initialize an empty list. */ + DList() : head(0), tail(0), listLen(0) {} + + /** + * \brief Perform a deep copy of the list. + * + * The elements of the other list are duplicated and put into this list. + * Elements are copied using the copy constructor. + */ + DList(const DList &other); + +#ifdef DOUBLELIST_VALUE + /** + * \brief Clear the double list contents. + * + * All elements are deleted. + */ + ~DList() { empty(); } + + /** + * \brief Assign another list into this list using a deep copy. + * + * The elements of the other list are duplicated and put into this list. + * Each list item is created using the copy constructor. If this list + * contains any elements before the copy, they are deleted first. + * + * \returns A reference to this. + */ + DList &operator=(const DList &other); + + /** + * \brief Transfer the contents of another list into this list. + * + * The elements of the other list moved in. The other list will be empty + * afterwards. If this list contains any elements before the copy, then + * they are deleted. + */ + void transfer(DList &other); +#else + /** + * \brief Abandon all elements in the list. + * + * List elements are not deleted. + */ + ~DList() {} + + /** + * \brief Perform a deep copy of the list. + * + * The elements of the other list are duplicated and put into this list. + * Each list item is created using the copy constructor. If this list + * contains any elements before the copy, they are abandoned. + * + * \returns A reference to this. + */ + DList &operator=(const DList &other); + + /** + * \brief Transfer the contents of another list into this list. + * + * The elements of the other list moved in. The other list will be empty + * afterwards. If this list contains any elements before the copy, they + * are abandoned. + */ + void transfer(DList &other); +#endif + + +#ifdef DOUBLELIST_VALUE + /** + * \brief Make a new element and prepend it to the front of the list. + * + * The item is copied into the new element using the copy constructor. + * Equivalent to list.addBefore(list.head, item). + */ + void prepend(const T &item); + + /** + * \brief Make a new element and append it to the end of the list. + * + * The item is copied into the new element using the copy constructor. + * Equivalent to list.addAfter(list.tail, item). + */ + void append(const T &item); + + /** + * \brief Make a new element and insert it immediately after an element in + * the list. + * + * The item is copied into the new element using the copy constructor. If + * prev_el is NULL then the new element is prepended to the front of the + * list. If prev_el is not already in the list then undefined behaviour + * results. Equivalent to list.addAfter(prev_el, new DListValEl(item)). + */ + void addAfter(Element *prev_el, const T &item); + + /** + * \brief Make a new element and insert it immediately before an element + * in the list. + * + * The item is copied into the new element using the copy construcotor. If + * next_el is NULL then the new element is appended to the end of the + * list. If next_el is not already in the list then undefined behaviour + * results. Equivalent to list.addBefore(next_el, new DListValEl(item)). + */ + void addBefore(Element *next_el, const T &item); +#endif + + /** + * \brief Prepend a single element to the front of the list. + * + * If new_el is already an element of some list, then undefined behaviour + * results. Equivalent to list.addBefore(list.head, new_el). + */ + void prepend(Element *new_el) { addBefore(head, new_el); } + + /** + * \brief Append a single element to the end of the list. + * + * If new_el is alreay an element of some list, then undefined behaviour + * results. Equivalent to list.addAfter(list.tail, new_el). + */ + void append(Element *new_el) { addAfter(tail, new_el); } + + /** + * \brief Prepend an entire list to the beginning of this list. + * + * All items are moved, not copied. Afterwards, the other list is emtpy. + * All items are prepended at once, so this is an O(1) operation. + * Equivalent to list.addBefore(list.head, dl). + */ + void prepend(DList &dl) { addBefore(head, dl); } + + /** + * \brief Append an entire list to the end of the list. + * + * All items are moved, not copied. Afterwards, the other list is empty. + * All items are appened at once, so this is an O(1) operation. + * Equivalent to list.addAfter(list.tail, dl). + */ + void append(DList &dl) { addAfter(tail, dl); } + + void addAfter(Element *prev_el, Element *new_el); + void addBefore(Element *next_el, Element *new_el); + + void addAfter(Element *prev_el, DList &dl); + void addBefore(Element *next_el, DList &dl); + + /** + * \brief Detach the head of the list + * + * The element detached is not deleted. If there is no head of the list + * (the list is empty) then undefined behaviour results. Equivalent to + * list.detach(list.head). + * + * \returns The element detached. + */ + Element *detachFirst() { return detach(head); } + + /** + * \brief Detach the tail of the list + * + * The element detached is not deleted. If there is no tail of the list + * (the list is empty) then undefined behaviour results. Equivalent to + * list.detach(list.tail). + * + * \returns The element detached. + */ + Element *detachLast() { return detach(tail); } + + /* Detaches an element from the list. Does not free any memory. */ + Element *detach(Element *el); + + /** + * \brief Detach and delete the first element in the list. + * + * If there is no first element (the list is empty) then undefined + * behaviour results. Equivalent to delete list.detach(list.head); + */ + void removeFirst() { delete detach( head ); } + + /** + * \brief Detach and delete the last element in the list. + * + * If there is no last element (the list is emtpy) then undefined + * behaviour results. Equivalent to delete list.detach(list.tail); + */ + void removeLast() { delete detach( tail ); } + + /** + * \brief Detach and delete an element from the list. + * + * If the element is not in the list, then undefined behaviour results. + * Equivalent to delete list.detach(el); + */ + void remove(Element *el) { delete detach( el ); } + + void empty(); + void abandon(); + + /** \brief The number of elements in the list. */ + long length() const { return listLen; } + + /** \brief Head and tail of the linked list. */ + Element *head, *tail; + + /** \brief The number of element in the list. */ + long listLen; + + /* Convenience access. */ + long size() const { return listLen; } + + /* Forward this so a ref can be used. */ + struct Iter; + + /* Class for setting the iterator. */ + struct IterFirst { IterFirst( const DList &l ) : l(l) { } const DList &l; }; + struct IterLast { IterLast( const DList &l ) : l(l) { } const DList &l; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Double List Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct from a double list. */ + Iter( const DList &dl ) : ptr(dl.head) { } + Iter( Element *el ) : ptr(el) { } + Iter( const IterFirst &dlf ) : ptr(dlf.l.head) { } + Iter( const IterLast &dll ) : ptr(dll.l.tail) { } + Iter( const IterNext &dln ) : ptr(dln.i.ptr->BASE_EL(next)) { } + Iter( const IterPrev &dlp ) : ptr(dlp.i.ptr->BASE_EL(prev)) { } + + /* Assign from a double list. */ + Iter &operator=( const DList &dl ) { ptr = dl.head; return *this; } + Iter &operator=( Element *el ) { ptr = el; return *this; } + Iter &operator=( const IterFirst &af ) { ptr = af.l.head; return *this; } + Iter &operator=( const IterLast &al ) { ptr = al.l.tail; return *this; } + Iter &operator=( const IterNext &an ) { ptr = an.i.ptr->BASE_EL(next); return *this; } + Iter &operator=( const IterPrev &ap ) { ptr = ap.i.ptr->BASE_EL(prev); return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr->BASE_EL(prev) == 0; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr->BASE_EL(next) == 0; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++() { return ptr = ptr->BASE_EL(next); } + + /** \brief Move to next item. */ + inline Element *increment() { return ptr = ptr->BASE_EL(next); } + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to previous item. */ + inline Element *operator--() { return ptr = ptr->BASE_EL(prev); } + + /** \brief Move to previous item. */ + inline Element *decrement() { return ptr = ptr->BASE_EL(prev); } + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the prev item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + Element *ptr; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst(*this); } + + /** \brief Return last element. */ + IterLast last() { return IterLast(*this); } +}; + +/* Copy constructor, does a deep copy of other. */ +template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE>:: + DList(const DList<DLMEL_TEMPUSE> &other) : + head(0), tail(0), listLen(0) +{ + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } +} + +#ifdef DOUBLELIST_VALUE + +/* Assignement operator does deep copy. */ +template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE> &DList<DLMEL_TEMPUSE>:: + operator=(const DList &other) +{ + /* Free the old list. The value list assumes items were allocated on the + * heap by itself. */ + empty(); + + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } + return *this; +} + +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + transfer(DList &other) +{ + /* Free the old list. The value list assumes items were allocated on the + * heap by itself. */ + empty(); + + head = other.head; + tail = other.tail; + listLen = other.listLen; + + other.abandon(); +} + +#else + +/* Assignement operator does deep copy. */ +template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE> &DList<DLMEL_TEMPUSE>:: + operator=(const DList &other) +{ + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } + return *this; +} + +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + transfer(DList &other) +{ + head = other.head; + tail = other.tail; + listLen = other.listLen; + + other.abandon(); +} + +#endif + +#ifdef DOUBLELIST_VALUE + +/* Prepend a new item. Inlining this bloats the caller with new overhead. */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + prepend(const T &item) +{ + addBefore(head, new Element(item)); +} + +/* Append a new item. Inlining this bloats the caller with the new overhead. */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + append(const T &item) +{ + addAfter(tail, new Element(item)); +} + +/* Add a new item after a prev element. Inlining this bloats the caller with + * the new overhead. */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addAfter(Element *prev_el, const T &item) +{ + addAfter(prev_el, new Element(item)); +} + +/* Add a new item before a next element. Inlining this bloats the caller with + * the new overhead. */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addBefore(Element *next_el, const T &item) +{ + addBefore(next_el, new Element(item)); +} + +#endif + +/* + * The larger iterator operators. + */ + +/* Postfix ++ */ +template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>::Iter:: + operator++(int) +{ + Element *rtn = ptr; + ptr = ptr->BASE_EL(next); + return rtn; +} + +/* Postfix -- */ +template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>::Iter:: + operator--(int) +{ + Element *rtn = ptr; + ptr = ptr->BASE_EL(prev); + return rtn; +} + +/** + * \brief Insert an element immediately after an element in the list. + * + * If prev_el is NULL then new_el is prepended to the front of the list. If + * prev_el is not in the list or if new_el is already in a list, then + * undefined behaviour results. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addAfter(Element *prev_el, Element *new_el) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->BASE_EL(prev) = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->BASE_EL(next) = head; + head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->BASE_EL(next) = prev_el->BASE_EL(next); + prev_el->BASE_EL(next) = new_el; + } + + /* Set reverse pointers. */ + if (new_el->BASE_EL(next) == 0) { + /* There is no next element. Set the tail pointer. */ + tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->BASE_EL(next)->BASE_EL(prev) = new_el; + } + + /* Update list length. */ + listLen++; +} + +/** + * \brief Insert an element immediatly before an element in the list. + * + * If next_el is NULL then new_el is appended to the end of the list. If + * next_el is not in the list or if new_el is already in a list, then + * undefined behaviour results. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addBefore(Element *next_el, Element *new_el) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->BASE_EL(next) = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->BASE_EL(prev) = tail; + tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->BASE_EL(prev) = next_el->BASE_EL(prev); + next_el->BASE_EL(prev) = new_el; + } + + /* Set forward pointers. */ + if (new_el->BASE_EL(prev) == 0) { + /* There is no previous element. Set the head pointer.*/ + head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->BASE_EL(prev)->BASE_EL(next) = new_el; + } + + /* Update list length. */ + listLen++; +} + +/** + * \brief Insert an entire list immediatly after an element in this list. + * + * Elements are moved, not copied. Afterwards, the other list is empty. If + * prev_el is NULL then the elements are prepended to the front of the list. + * If prev_el is not in the list then undefined behaviour results. All + * elements are inserted into the list at once, so this is an O(1) operation. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addAfter( Element *prev_el, DList<DLMEL_TEMPUSE> &dl ) +{ + /* Do not bother if dl has no elements. */ + if ( dl.listLen == 0 ) + return; + + /* Set the previous pointer of dl.head to prev_el. We do + * this regardless of the state of the list. */ + dl.head->BASE_EL(prev) = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + dl.tail->BASE_EL(next) = head; + head = dl.head; + } + else { + /* There was a prev_el, we can access previous next. */ + dl.tail->BASE_EL(next) = prev_el->BASE_EL(next); + prev_el->BASE_EL(next) = dl.head; + } + + /* Set reverse pointers. */ + if (dl.tail->BASE_EL(next) == 0) { + /* There is no next element. Set the tail pointer. */ + tail = dl.tail; + } + else { + /* There is a next element. Set it's prev pointer. */ + dl.tail->BASE_EL(next)->BASE_EL(prev) = dl.tail; + } + + /* Update the list length. */ + listLen += dl.listLen; + + /* Empty out dl. */ + dl.head = dl.tail = 0; + dl.listLen = 0; +} + +/** + * \brief Insert an entire list immediately before an element in this list. + * + * Elements are moved, not copied. Afterwards, the other list is empty. If + * next_el is NULL then the elements are appended to the end of the list. If + * next_el is not in the list then undefined behaviour results. All elements + * are inserted at once, so this is an O(1) operation. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>:: + addBefore( Element *next_el, DList<DLMEL_TEMPUSE> &dl ) +{ + /* Do not bother if dl has no elements. */ + if ( dl.listLen == 0 ) + return; + + /* Set the next pointer of dl.tail to next_el. We do + * this regardless of the state of the list. */ + dl.tail->BASE_EL(next) = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + dl.head->BASE_EL(prev) = tail; + tail = dl.tail; + } + else { + /* There is a next element and we can access next's previous. */ + dl.head->BASE_EL(prev) = next_el->BASE_EL(prev); + next_el->BASE_EL(prev) = dl.tail; + } + + /* Set forward pointers. */ + if (dl.head->BASE_EL(prev) == 0) { + /* There is no previous element. Set the head pointer.*/ + head = dl.head; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + dl.head->BASE_EL(prev)->BASE_EL(next) = dl.head; + } + + /* Update list length. */ + listLen += dl.listLen; + + /* Empty out dl. */ + dl.head = dl.tail = 0; + dl.listLen = 0; +} + + +/** + * \brief Detach an element from the list. + * + * The element is not deleted. If the element is not in the list, then + * undefined behaviour results. + * + * \returns The element detached. + */ +template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>:: + detach(Element *el) +{ + /* Set forward pointers to skip over el. */ + if (el->BASE_EL(prev) == 0) + head = el->BASE_EL(next); + else { + el->BASE_EL(prev)->BASE_EL(next) = + el->BASE_EL(next); + } + + /* Set reverse pointers to skip over el. */ + if (el->BASE_EL(next) == 0) + tail = el->BASE_EL(prev); + else { + el->BASE_EL(next)->BASE_EL(prev) = + el->BASE_EL(prev); + } + + /* Update List length and return element we detached. */ + listLen--; + return el; +} + +/** + * \brief Clear the list by deleting all elements. + * + * Each item in the list is deleted. The list is reset to its initial state. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::empty() +{ + Element *nextToGo = 0, *cur = head; + + while (cur != 0) + { + nextToGo = cur->BASE_EL(next); + delete cur; + cur = nextToGo; + } + head = tail = 0; + listLen = 0; +} + +/** + * \brief Clear the list by forgetting all elements. + * + * All elements are abandoned, not deleted. The list is reset to it's initial + * state. + */ +template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::abandon() +{ + head = tail = 0; + listLen = 0; +} + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/dlist.h b/aapl/dlist.h new file mode 100644 index 0000000..eaf3e5d --- /dev/null +++ b/aapl/dlist.h @@ -0,0 +1,64 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLIST_H +#define _AAPL_DLIST_H + +#define BASE_EL(name) name +#define DLMEL_TEMPDEF class Element +#define DLMEL_TEMPUSE Element +#define DList DList + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DList + * \brief Basic doubly linked list. + * + * DList is the standard by-structure list type. This class requires the + * programmer to declare a list element type that has the necessary next and + * previous pointers in it. This can be achieved by inheriting from the + * DListEl class or by simply adding next and previous pointers directly into + * the list element class. + * + * DList does not assume ownership of elements in the list. If the elements + * are known to reside on the heap, the provided empty() routine can be used to + * delete all elements, however the destructor will not call this routine, it + * will simply abandon all the elements. It is up to the programmer to + * explicitly de-allocate items when necessary. + * + * \include ex_dlist.cpp + */ + +/*@}*/ + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList + +#endif /* _AAPL_DLIST_H */ + diff --git a/aapl/dlistmel.h b/aapl/dlistmel.h new file mode 100644 index 0000000..3433139 --- /dev/null +++ b/aapl/dlistmel.h @@ -0,0 +1,71 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLISTMEL_H +#define _AAPL_DLISTMEL_H + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DListMel + * \brief Doubly linked list for elements that may appear in multiple lists. + * + * This class is similar to DList, except that the user defined list element + * can inherit from multple DListEl classes and consequently be an element in + * multiple lists. In other words, DListMel allows a single instance of a data + * structure to be an element in multiple lists without the lists interfereing + * with one another. + * + * For each list that an element class is to appear in, the element must have + * unique next and previous pointers that can be unambiguously refered to with + * some base class name. This name is given to DListMel as a template argument + * so it can use the correct next and previous pointers in its list + * operations. + * + * DListMel does not assume ownership of elements in the list. If the elements + * are known to reside on the heap and are not contained in any other list or + * data structure, the provided empty() routine can be used to delete all + * elements, however the destructor will not call this routine, it will simply + * abandon all the elements. It is up to the programmer to explicitly + * de-allocate items when it is safe to do so. + * + * \include ex_dlistmel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define DLMEL_TEMPDEF class Element, class BaseEl +#define DLMEL_TEMPUSE Element, BaseEl +#define DList DListMel + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList + +#endif /* _AAPL_DLISTMEL_H */ + diff --git a/aapl/dlistval.h b/aapl/dlistval.h new file mode 100644 index 0000000..6f24999 --- /dev/null +++ b/aapl/dlistval.h @@ -0,0 +1,71 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLISTVAL_H +#define _AAPL_DLISTVAL_H + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DListVal + * \brief By-value doubly linked list. + * + * This class is a doubly linked list that does not require a list element + * type to be declared. The user instead gives a type that is to be stored in + * the list element. When inserting a new data item, the value is copied into + * a newly allocated element. This list is inteded to behave and be utilized + * like the list template found in the STL. + * + * DListVal is different from the other lists in that it allocates elements + * itself. The raw element insert interface is still exposed for convenience, + * however, the list assumes all elements in the list are allocated on the + * heap and are to be managed by the list. The destructor WILL delete the + * contents of the list. If the list is ever copied in from another list, the + * existing contents are deleted first. This is in contrast to DList and + * DListMel, which will never delete their contents to allow for statically + * allocated elements. + * + * \include ex_dlistval.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define DLMEL_TEMPDEF class T +#define DLMEL_TEMPUSE T +#define DList DListVal +#define Element DListValEl<T> +#define DOUBLELIST_VALUE + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList +#undef Element +#undef DOUBLELIST_VALUE + +#endif /* _AAPL_DLISTVAL_H */ + diff --git a/aapl/insertsort.h b/aapl/insertsort.h new file mode 100644 index 0000000..eb3e264 --- /dev/null +++ b/aapl/insertsort.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_INSERTSORT_H +#define _AAPL_INSERTSORT_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class InsertSort + * \brief Insertion sort an array of data. + * + * InsertSort can be used to sort any array of objects of type T provided a + * compare class is given. InsertSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * InsertSort runs in O(n^2) time. It is most useful when sorting small arrays. + * where it can outperform the O(n*log(n)) sorters due to its simplicity. + * InsertSort is a not a stable sort. Elements with the same key will not have + * their relative ordering preserved. + */ + +/*@}*/ + +/* InsertSort. */ +template <class T, class Compare> class InsertSort + : public Compare +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); +}; + + +/** + * \brief Insertion sort an array of data. + */ +template <class T, class Compare> + void InsertSort<T,Compare>::sort(T *data, long len) +{ + /* For each next largest spot in the sorted array... */ + for ( T *dest = data; dest < data+len-1; dest++ ) { + /* Find the next smallest element in the unsorted array. */ + T *smallest = dest; + for ( T *src = dest+1; src < data+len; src++ ) { + /* If src is smaller than the current src, then use it. */ + if ( compare( *src, *smallest ) < 0 ) + smallest = src; + } + + if ( smallest != dest ) { + /* Swap dest, smallest. */ + char tmp[sizeof(T)]; + memcpy( tmp, dest, sizeof(T) ); + memcpy( dest, smallest, sizeof(T) ); + memcpy( smallest, tmp, sizeof(T) ); + } + } +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_INSERTSORT_H */ diff --git a/aapl/mergesort.h b/aapl/mergesort.h new file mode 100644 index 0000000..d017511 --- /dev/null +++ b/aapl/mergesort.h @@ -0,0 +1,140 @@ +/* + * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_MERGESORT_H +#define _AAPL_MERGESORT_H + +#include "bubblesort.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class MergeSort + * \brief Merge sort an array of data. + * + * MergeSort can be used to sort any array of objects of type T provided a + * compare class is given. MergeSort is not in-place, it requires temporary + * storage equal to the size of the array. The temporary storage is allocated + * on the heap. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * MergeSort runs in worst case O(n*log(n)) time. In most cases it is slower + * than QuickSort because more copying is neccessary. But on the other hand, + * it is a stable sort, meaning that objects with the same key have their + * relative ordering preserved. Also, its worst case is better. MergeSort + * switches to a BubbleSort when the size of the array being sorted is small. + * This happens when directly sorting a small array or when MergeSort calls + * itself recursively on a small portion of a larger array. + */ + +/*@}*/ + + +/* MergeSort. */ +template <class T, class Compare> class MergeSort + : public BubbleSort<T, Compare> +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); + +private: + /* Recursive worker. */ + void doSort(T *tmpStor, T *data, long len); +}; + +#define _MS_BUBBLE_THRESH 16 + +/* Recursive mergesort worker. Split data, make recursive calls, merge + * results. */ +template< class T, class Compare> void MergeSort<T,Compare>:: + doSort(T *tmpStor, T *data, long len) +{ + if ( len <= 1 ) + return; + + if ( len <= _MS_BUBBLE_THRESH ) { + BubbleSort<T, Compare>::sort( data, len ); + return; + } + + long mid = len / 2; + + doSort( tmpStor, data, mid ); + doSort( tmpStor + mid, data + mid, len - mid ); + + /* Merge the data. */ + T *endLower = data + mid, *lower = data; + T *endUpper = data + len, *upper = data + mid; + T *dest = tmpStor; + while ( true ) { + if ( lower == endLower ) { + /* Possibly upper left. */ + if ( upper != endUpper ) + memcpy( dest, upper, (endUpper - upper) * sizeof(T) ); + break; + } + else if ( upper == endUpper ) { + /* Only lower left. */ + if ( lower != endLower ) + memcpy( dest, lower, (endLower - lower) * sizeof(T) ); + break; + } + else { + /* Both upper and lower left. */ + if ( compare(*lower, *upper) <= 0 ) + memcpy( dest++, lower++, sizeof(T) ); + else + memcpy( dest++, upper++, sizeof(T) ); + } + } + + /* Copy back from the tmpStor array. */ + memcpy( data, tmpStor, sizeof( T ) * len ); +} + +/** + * \brief Merge sort an array of data. + */ +template< class T, class Compare> + void MergeSort<T,Compare>::sort(T *data, long len) +{ + /* Allocate the tmp space needed by merge sort, sort and free. */ + T *tmpStor = (T*) new char[sizeof(T) * len]; + doSort( tmpStor, data, len ); + delete[] (char*) tmpStor; +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_MERGESORT_H */ diff --git a/aapl/quicksort.h b/aapl/quicksort.h new file mode 100644 index 0000000..9bb96ef --- /dev/null +++ b/aapl/quicksort.h @@ -0,0 +1,185 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_QUICKSORT_H +#define _AAPL_QUICKSORT_H + +#include "insertsort.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class QuickSort + * \brief Quick sort an array of data. + * + * QuickSort can be used to sort any array of objects of type T provided a + * compare class is given. QuickSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * QuickSort runs in O(n*log(n)) time in the average case. It is faster than + * mergsort in the average case because it does less moving of data. The + * performance of quicksort depends mostly on the choice of pivot. This + * implementation picks the pivot as the median of first, middle, last. This + * choice of pivot avoids the O(n^2) worst case for input already sorted, but + * it is still possible to encounter the O(n^2) worst case. For example an + * array of identical elements will run in O(n^2) + * + * QuickSort is not a stable sort. Elements with the same key will not have + * their relative ordering preserved. QuickSort switches to an InsertSort + * when the size of the array being sorted is small. This happens when + * directly sorting a small array or when QuickSort calls iteself recursively + * on a small portion of a larger array. + */ + +/*@}*/ + +/* QuickSort. */ +template <class T, class Compare> class QuickSort : + public InsertSort<T, Compare> +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); + +private: + /* Recursive worker. */ + void doSort(T *start, T *end); + T *partition(T *start, T *end); + inline T *median(T *start, T *end); +}; + +#define _QS_INSERTION_THRESH 16 + +/* Finds the median of start, middle, end. */ +template <class T, class Compare> T *QuickSort<T,Compare>:: + median(T *start, T *end) +{ + T *pivot, *mid = start + (end-start)/2; + + /* CChoose the pivot. */ + if ( compare(*start, *mid) < 0 ) { + if ( compare(*mid, *end) < 0 ) + pivot = mid; + else if ( compare(*start, *end) < 0 ) + pivot = end; + else + pivot = start; + } + else if ( compare(*start, *end) < 0 ) + pivot = start; + else if ( compare(*mid, *end) < 0 ) + pivot = end; + else + pivot = mid; + + return pivot; +} + +template <class T, class Compare> T *QuickSort<T,Compare>:: + partition(T *start, T *end) +{ + /* Use the median of start, middle, end as the pivot. First save + * it off then move the last element to the free spot. */ + char pcPivot[sizeof(T)]; + T *pivot = median(start, end); + + memcpy( pcPivot, pivot, sizeof(T) ); + if ( pivot != end ) + memcpy( pivot, end, sizeof(T) ); + + T *first = start-1; + T *last = end; + pivot = (T*) pcPivot; + + /* Shuffle element to the correct side of the pivot, ending + * up with the free spot where the pivot will go. */ + while ( true ) { + /* Throw one element ahead to the free spot at last. */ + while ( true ) { + first += 1; + if ( first == last ) + goto done; + if ( compare( *first, *pivot ) > 0 ) { + memcpy(last, first, sizeof(T)); + break; + } + } + + /* Throw one element back to the free spot at first. */ + while ( true ) { + last -= 1; + if ( last == first ) + goto done; + if ( compare( *last, *pivot ) < 0 ) { + memcpy(first, last, sizeof(T)); + break; + } + } + } +done: + /* Put the pivot into the middle spot for it. */ + memcpy( first, pivot, sizeof(T) ); + return first; +} + + +template< class T, class Compare> void QuickSort<T,Compare>:: + doSort(T *start, T *end) +{ + long len = end - start + 1; + if ( len > _QS_INSERTION_THRESH ) { + /* Use quicksort. */ + T *pivot = partition( start, end ); + doSort(start, pivot-1); + doSort(pivot+1, end); + } + else if ( len > 1 ) { + /* Array is small, use insertion sort. */ + InsertSort<T, Compare>::sort( start, len ); + } +} + +/** + * \brief Quick sort an array of data. + */ +template< class T, class Compare> + void QuickSort<T,Compare>::sort(T *data, long len) +{ + /* Call recursive worker. */ + doSort(data, data+len-1); +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_QUICKSORT_H */ diff --git a/aapl/resize.h b/aapl/resize.h new file mode 100644 index 0000000..24edc16 --- /dev/null +++ b/aapl/resize.h @@ -0,0 +1,344 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_RESIZE_H +#define _AAPL_RESIZE_H + +#include <assert.h> + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/* This step is expressed in units of T. Changing this requires changes to + * docs in ResizeLin constructor. */ +#define LIN_DEFAULT_STEP 256 + +/* + * Resizing macros giving different resize methods. + */ + +/* If needed is greater than existing, give twice needed. */ +#define EXPN_UP( existing, needed ) \ + needed > existing ? (needed<<1) : existing + +/* If needed is less than 1 quarter existing, give twice needed. */ +#define EXPN_DOWN( existing, needed ) \ + needed < (existing>>2) ? (needed<<1) : existing + +/* If needed is greater than existing, give needed plus step. */ +#define LIN_UP( existing, needed ) \ + needed > existing ? (needed+step) : existing + +/* If needed is less than existing - 2 * step then give needed plus step. */ +#define LIN_DOWN( existing, needed ) \ + needed < (existing-(step<<1)) ? (needed+step) : existing + +/* Return existing. */ +#define CONST_UP( existing, needed ) existing + +/* Return existing. */ +#define CONST_DOWN( existing, needed ) existing + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeLin + * \brief Linear table resizer. + * + * When an up resize or a down resize is needed, ResizeLin allocates the space + * needed plus some user defined step. The result is that when growing the + * vector in a linear fashion, the number of resizes is also linear. + * + * If only up resizing is done, then there will never be more than step unused + * spaces in the vector. If down resizing is done as well, there will never be + * more than 2*step unused spaces in the vector. The up resizing and down + * resizing policies are offset to improve performance when repeatedly + * inserting and removing a small number of elements relative to the step. + * This scheme guarantees that repetitive inserting and removing of a small + * number of elements will never result in repetative reallocation. + * + * The vectors pass sizes to the resizer in units of T, so the step gets + * interpreted as units of T. + */ + +/*@}*/ + +/* Linear resizing. */ +class ResizeLin +{ +protected: + /** + * \brief Default constructor. + * + * Intializes resize step to 256 units of the table type T. + */ + ResizeLin() : step(LIN_DEFAULT_STEP) { } + + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then allocate + * the space needed plus the step. The step is in units of T. + */ + inline long upResize( long existing, long needed ) + { return LIN_UP(existing, needed); } + + /** + * \brief Determine the new table size when down resizing. + * + * If space needed is less than the existing - 2*step, then allocate the + * space needed space plus the step. The step is in units of T. + */ + inline long downResize( long existing, long needed ) + { return LIN_DOWN(existing, needed); } + +public: + /** + * \brief Step for linear resize. + * + * Amount of extra space in units of T added each time a resize must take + * place. This may be changed at any time. The step should be >= 0. + */ + long step; +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeCtLin + * \brief Linear table resizer with compile time step. + * + * When an up resize or a down resize is needed, ResizeCtLin allocates the + * space needed plus some compile time defined step. The result is that when + * growing the vector in a linear fashion, the number of resizes is also + * linear. + * + * If only up resizing is done, then there will never be more than step unused + * spaces in the vector. If down resizing is done as well, there will never be + * more than 2*step unused spaces in the vector. The up resizing and down + * resizing policies are offset to improve performance when repeatedly + * inserting and removing a small number of elements relative to the step. + * This scheme guarantees that repetitive inserting and removing of a small + * number of elements will never result in repetative reallocation. + * + * The vectors pass sizes to the resizer in units of T, so the step gets + * interpreted as units of T. + */ + +/*@}*/ + +/* Linear resizing. */ +template <long step> class ResizeCtLin +{ +protected: + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then allocate + * the space needed plus the step. The step is in units of T. + */ + inline long upResize( long existing, long needed ) + { return LIN_UP(existing, needed); } + + /** + * \brief Determine the new table size when down resizing. + * + * If space needed is less than the existing - 2*step, then allocate the + * space needed space plus the step. The step is in units of T. + */ + inline long downResize( long existing, long needed ) + { return LIN_DOWN(existing, needed); } +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeConst + * \brief Constant table resizer. + * + * When an up resize is needed the existing size is always used. ResizeConst + * does not allow dynamic resizing. To use ResizeConst, the vector needs to be + * constructed with and initial allocation amount otherwise it will be + * unusable. + */ + +/*@}*/ + +/* Constant table resizing. */ +class ResizeConst +{ +protected: + /* Assert don't need more than exists. Return existing. */ + static inline long upResize( long existing, long needed ); + + /** + * \brief Determine the new table size when down resizing. + * + * Always returns the existing table size. + */ + static inline long downResize( long existing, long needed ) + { return CONST_DOWN(existing, needed); } +}; + +/** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then an assertion + * will fail. Otherwise returns the existing size. + */ +inline long ResizeConst::upResize( long existing, long needed ) +{ + assert( needed <= existing ); + return CONST_UP(existing, needed); +} + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeRunTime + * \brief Run time settable table resizer. + * + * ResizeRunTime can have it's up and down resizing policies set at run time. + * Both up and down policies can be set independently to one of Exponential, + * Linear, or Constant. See the documentation for ResizeExpn, ResizeLin, and + * ResizeConst for the details of the resizing policies. + * + * The policies may be changed at any time. The default policies are + * both Exponential. + */ + +/*@}*/ + +/* Run time resizing. */ +class ResizeRunTime +{ +protected: + /** + * \brief Default constuctor. + * + * The up and down resizing it initialized to Exponetial. The step + * defaults to 256 units of T. + */ + inline ResizeRunTime(); + + /** + * \brief Resizing policies. + */ + enum ResizeType { + Exponential, /*!< Exponential resizing. */ + Linear, /*!< Linear resizing. */ + Constant /*!< Constant table size. */ + }; + + inline long upResize( long existing, long needed ); + inline long downResize( long existing, long needed ); + +public: + /** + * \brief Step for linear resize. + * + * Amount of extra space in units of T added each time a resize must take + * place. This may be changed at any time. The step should be >= 0. + */ + long step; + + /** + * \brief Up resizing policy. + */ + ResizeType upResizeType; + + /** + * \brief Down resizing policy. + */ + ResizeType downResizeType; +}; + +inline ResizeRunTime::ResizeRunTime() +: + step( LIN_DEFAULT_STEP ), + upResizeType( Exponential ), + downResizeType( Exponential ) +{ +} + +/** + * \brief Determine the new table size when up resizing. + * + * Type of up resizing is determined by upResizeType. Exponential, Linear and + * Constant resizing is the same as that of ResizeExpn, ResizeLin and + * ResizeConst. + */ +inline long ResizeRunTime::upResize( long existing, long needed ) +{ + switch ( upResizeType ) { + case Exponential: + return EXPN_UP(existing, needed); + case Linear: + return LIN_UP(existing, needed); + case Constant: + assert( needed <= existing ); + return CONST_UP(existing, needed); + } + return 0; +}; + +/** + * \brief Determine the new table size when down resizing. + * + * Type of down resizing is determined by downResiizeType. Exponential, Linear + * and Constant resizing is the same as that of ResizeExpn, ResizeLin and + * ResizeConst. + */ +inline long ResizeRunTime::downResize( long existing, long needed ) +{ + switch ( downResizeType ) { + case Exponential: + return EXPN_DOWN(existing, needed); + case Linear: + return LIN_DOWN(existing, needed); + case Constant: + return CONST_DOWN(existing, needed); + } + return 0; +} + +/* Don't need these anymore. */ +#undef EXPN_UP +#undef EXPN_DOWN +#undef LIN_UP +#undef LIN_DOWN +#undef CONST_UP +#undef CONST_DOWN + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_RESIZE_H */ diff --git a/aapl/sbstmap.h b/aapl/sbstmap.h new file mode 100644 index 0000000..9436a47 --- /dev/null +++ b/aapl/sbstmap.h @@ -0,0 +1,121 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTMAP_H +#define _AAPL_SBSTMAP_H + +#include "compare.h" +#include "svector.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \brief Element for BstMap. + * + * Stores the key and value pair. + */ +template <class Key, class Value> struct SBstMapEl +{ + SBstMapEl() {} + SBstMapEl(const Key &key) : key(key) {} + SBstMapEl(const Key &key, const Value &val) : key(key), value(val) {} + + /** \brief The key */ + Key key; + + /** \brief The value. */ + Value value; +}; + +#ifdef AAPL_NAMESPACE +} +#endif + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstMap + * \brief Copy-on-write binary search table for key and value pairs. + * + * This is a map style binary search table that employs the copy-on-write + * mechanism for table data. BstMap stores key and value pairs in each + * element. The key and value can be any type. A compare class for the key + * must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Key, class Value, \ + class Compare = CmpOrd<Key>, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize +#define BST_TEMPL_USE Key, Value, Compare, Resize +#define GET_KEY(el) ((el).key) +#define BstTable SBstMap +#define Vector SVector +#define Table STable +#define Element SBstMapEl<Key, Value> +#define BSTMAP +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef Element +#undef BSTMAP +#undef SHARED_BST + +/** + * \fn SBstMap::insert(const Key &key, BstMapEl<Key, Value> **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then a new element + * having key is inserted. They key copy constructor and value default + * constructor are used to place the pair in the table. If lastFound is given, + * it is set to the new entry created. If the insert fails then lastFound is + * set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstMap::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then the new element having key is placed next + * to some other pair of the same key. InsertMulti cannot fail. The key copy + * constructor and the value default constructor are used to place the pair in + * the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTMAP_H */ diff --git a/aapl/sbstset.h b/aapl/sbstset.h new file mode 100644 index 0000000..fe8ddf6 --- /dev/null +++ b/aapl/sbstset.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTSET_H +#define _AAPL_SBSTSET_H + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstSet + * \brief Copy-on-write binary search table for types that are the key. + * + * This is a set style binary search table that employs the copy-on-write + * mechanism for storing table data. BstSet is suitable for types that + * comprise the entire key. Rather than look into the element to retrieve the + * key, the element is the key. A class that contains a comparison routine + * for the key must be given. + */ + +/*@}*/ + +#include "compare.h" +#include "svector.h" + +#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd<Key>, \ + class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Compare, class Resize +#define BST_TEMPL_USE Key, Compare, Resize +#define GET_KEY(el) (el) +#define BstTable SBstSet +#define Vector SVector +#define Table STable +#define Element Key +#define BSTSET +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef Element +#undef BSTSET +#undef SHARED_BST + +/** + * \fn SBstSet::insert(const Key &key, Key **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then it is inserted. + * The key's copy constructor is used to place the item in the table. If + * lastFound is given, it is set to the new entry created. If the insert fails + * then lastFound is set to the existing key of the same value. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstSet::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then it is placed next to some other key of the + * same value. InsertMulti cannot fail. The key's copy constructor is used to + * place the item in the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTSET_H */ diff --git a/aapl/sbsttable.h b/aapl/sbsttable.h new file mode 100644 index 0000000..100b87e --- /dev/null +++ b/aapl/sbsttable.h @@ -0,0 +1,93 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTTABLE_H +#define _AAPL_SBSTTABLE_H + +#include "compare.h" +#include "svector.h" + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstTable + * \brief Copy-on-write binary search table for structures that contain a key. + * + * This is a basic binary search table that employs a copy-on-write data + * storage mechanism. It can be used to contain a structure that has a key and + * possibly some data. The key should be a member of the element class and + * accessible with getKey(). A class containing the compare routine must be + * supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Element, class Key, \ + class Compare = CmpOrd<Key>, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize +#define BST_TEMPL_USE Element, Key, Compare, Resize +#define GET_KEY(el) ((el).getKey()) +#define BstTable SBstTable +#define Vector SVector +#define Table STable +#define BSTTABLE +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef BSTTABLE +#undef SHARED_BST + +/** + * \fn SBstTable::insert(const Key &key, Element **lastFound) + * \brief Insert a new element with the given key. + * + * If the given key does not already exist in the table a new element is + * inserted with the given key. A constructor taking only const Key& is used + * to initialize the new element. If lastFound is given, it is set to the new + * element created. If the insert fails then lastFound is set to the existing + * element with the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstTable::insertMulti(const Key &key) + * \brief Insert a new element even if the key exists already. + * + * If the key exists already then the new element is placed next to some + * element with the same key. InsertMulti cannot fail. A constructor taking + * only const Key& is used to initialize the new element. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTTABLE_H */ diff --git a/aapl/svector.h b/aapl/svector.h new file mode 100644 index 0000000..ff9e97c --- /dev/null +++ b/aapl/svector.h @@ -0,0 +1,1426 @@ +/* + * Copyright 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SVECTOR_H +#define _AAPL_SVECTOR_H + +#include <new> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class SVector + * \brief Copy-on-write dynamic array. + * + * SVector is a variant of Vector that employs copy-on-write behaviour. The + * SVector copy constructor and = operator make shallow copies. If a vector + * that references shared data is modified with insert, replace, append, + * prepend, setAs or remove, a new copy is made so as not to interfere with + * the shared data. However, shared individual elements may be modified by + * bypassing the SVector interface. + * + * SVector is a dynamic array that can be used to contain complex data + * structures that have constructors and destructors as well as simple types + * such as integers and pointers. + * + * SVector supports inserting, overwriting, and removing single or multiple + * elements at once. Constructors and destructors are called wherever + * appropriate. For example, before an element is overwritten, it's + * destructor is called. + * + * SVector provides automatic resizing of allocated memory as needed and + * offers different allocation schemes for controlling how the automatic + * allocation is done. Two senses of the the length of the data is + * maintained: the amount of raw memory allocated to the vector and the number + * of actual elements in the vector. The various allocation schemes control + * how the allocated space is changed in relation to the number of elements in + * the vector. + */ + +/*@}*/ + +/* SVector */ +template < class T, class Resize = ResizeExpn > class SVector : + public STable<T>, public Resize +{ +private: + typedef STable<T> BaseTable; + +public: + /** + * \brief Initialize an empty vector with no space allocated. + * + * If a linear resizer is used, the step defaults to 256 units of T. For a + * runtime vector both up and down allocation schemes default to + * Exponential. + */ + SVector() { } + + /** + * \brief Create a vector that contains an initial element. + * + * The vector becomes one element in length. The element's copy + * constructor is used to place the value in the vector. + */ + SVector(const T &val) { setAs(&val, 1); } + + /** + * \brief Create a vector that contains an array of elements. + * + * The vector becomes len elements in length. Copy constructors are used + * to place the new elements in the vector. + */ + SVector(const T *val, long len) { setAs(val, len); } + + /* Shallow copy. */ + SVector( const SVector &v ); + + /* Shallow copy. */ + SVector(STabHead *head); + + /** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ + ~SVector() { empty(); } + + /* Delete all items. */ + void empty(); + + /** + * \brief Deep copy another vector into this vector. + * + * Copies the entire contents of the other vector into this vector. Any + * existing contents are first deleted. Equivalent to setAs. + */ + void deepCopy( const SVector &v ) { setAs(v.data, v.length()); } + + /* Perform a shallow copy of another vector. */ + SVector &operator=( const SVector &v ); + + /* Perform a shallow copy of another vector by the header. */ + SVector &operator=( STabHead *head ); + + + /*@{*/ + /** + * \brief Insert one element at position pos. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The copy constructor is used to place the element into this + * vector. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative then it is treated as an offset + * relative to the length of the vector. + */ + void insert(long pos, const T &val) { insert(pos, &val, 1); } + + /* Insert an array of values. */ + void insert(long pos, const T *val, long len); + + /** + * \brief Insert all the elements from another vector at position pos. + * + * Elements in this vector from pos onward are shifted v.length() spaces + * to the right. The element's copy constructor is used to copy the items + * into this vector. The other vector is left unchanged. If pos is off the + * end of the vector, then undefined behaviour results. If pos is negative + * then it is treated as an offset relative to the length of the vector. + * Equivalent to vector.insert(pos, other.data, other.length()). + */ + void insert(long pos, const SVector &v) { insert(pos, v.data, v.length()); } + + /* Insert len copies of val into the vector. */ + void insertDup(long pos, const T &val, long len); + + /** + * \brief Insert one new element using the default constrcutor. + * + * Elements in the vector from pos onward are shifted one space to the right. + * The default constructor is used to init the new element. If pos is greater + * than the length of the vector then undefined behaviour results. If pos is + * negative then it is treated as an offset relative to the length of the + * vector. + */ + void insertNew(long pos) { insertNew(pos, 1); } + + /* Insert len new items using default constructor. */ + void insertNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Remove one element at position pos. + * + * The element's destructor is called. Elements to the right of pos are + * shifted one space to the left to take up the free space. If pos is greater + * than or equal to the length of the vector then undefined behavior results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ + void remove(long pos) { remove(pos, 1); } + + /* Delete a number of elements. */ + void remove(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Replace one element at position pos. + * + * If there is an existing element at position pos (if pos is less than the + * length of the vector) then its destructor is called before the space is + * used. The copy constructor is used to place the element into the vector. + * If pos is greater than the length of the vector then undefined behaviour + * results. If pos is negative then it is treated as an offset relative to + * the length of the vector. + */ + void replace(long pos, const T &val) { replace(pos, &val, 1); } + + /* Replace with an array of values. */ + void replace(long pos, const T *val, long len); + + /** + * \brief Replace at position pos with all the elements of another vector. + * + * Replace at position pos with all the elements of another vector. The other + * vector is left unchanged. If there are existing elements at the positions + * to be replaced, then destructors are called before the space is used. Copy + * constructors are used to place the elements into this vector. It is + * allowable for the pos and length of the other vector to specify a + * replacement that overwrites existing elements and creates new ones. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative, then it is treated as an offset relative to the length + * of the vector. + */ + void replace(long pos, const SVector &v) { replace(pos, v.data, v.length()); } + + /* Replace len items with len copies of val. */ + void replaceDup(long pos, const T &val, long len); + + /** + * \brief Replace at position pos with one new element. + * + * If there is an existing element at the position to be replaced (pos is + * less than the length of the vector) then the element's destructor is + * called before the space is used. The default constructor is used to + * initialize the new element. If pos is greater than the length of the + * vector then undefined behaviour results. If pos is negative, then it is + * treated as an offset relative to the length of the vector. + */ + void replaceNew(long pos) { replaceNew(pos, 1); } + + /* Replace len items at pos with newly constructed objects. */ + void replaceNew(long pos, long len); + /*@}*/ + + /*@{*/ + + /** + * \brief Set the contents of the vector to be val exactly. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * place the val in the vector. + */ + void setAs(const T &val) { setAs(&val, 1); } + + /* Set to the contents of an array. */ + void setAs(const T *val, long len); + + /** + * \brief Set the vector to exactly the contents of another vector. + * + * The vector becomes v.length() elements in length. Destructors are called + * on any existing elements. Copy constructors are used to place the new + * elements in the vector. + */ + void setAs(const SVector &v) { setAs(v.data, v.length()); } + + /* Set as len copies of item. */ + void setAsDup(const T &item, long len); + + /** + * \brief Set the vector to exactly one new item. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The default constructor is used to + * init the new item. + */ + void setAsNew() { setAsNew(1); } + + /* Set as newly constructed objects using the default constructor. */ + void setAsNew(long len); + /*@}*/ + + /*@{*/ + /** + * \brief Append one elment to the end of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void append(const T &val) { replace(BaseTable::length(), &val, 1); } + + /** + * \brief Append len elements to the end of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void append(const T *val, long len) { replace(BaseTable::length(), val, len); } + + /** + * \brief Append the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place + * the elements in the vector. + */ + void append(const SVector &v) + { replace(BaseTable::length(), v.data, v.length()); } + + /** + * \brief Append len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void appendDup(const T &item, long len) { replaceDup(BaseTable::length(), item, len); } + + /** + * \brief Append a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void appendNew() { replaceNew(BaseTable::length(), 1); } + + /** + * \brief Append len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void appendNew(long len) { replaceNew(BaseTable::length(), len); } + /*@}*/ + + + /*@{*/ + /** + * \brief Prepend one elment to the front of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void prepend(const T &val) { insert(0, &val, 1); } + + /** + * \brief Prepend len elements to the front of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void prepend(const T *val, long len) { insert(0, val, len); } + + /** + * \brief Prepend the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place + * the elements in the vector. + */ + void prepend(const SVector &v) { insert(0, v.data, v.length()); } + + /** + * \brief Prepend len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void prependDup(const T &item, long len) { insertDup(0, item, len); } + + /** + * \brief Prepend a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void prependNew() { insertNew(0, 1); } + + /** + * \brief Prepend len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void prependNew(long len) { insertNew(0, len); } + /*@}*/ + + /* Convenience access. */ + T &operator[](int i) const { return BaseTable::data[i]; } + long size() const { return BaseTable::length(); } + + /* Various classes for setting the iterator */ + struct Iter; + struct IterFirst { IterFirst( const SVector &v ) : v(v) { } const SVector &v; }; + struct IterLast { IterLast( const SVector &v ) : v(v) { } const SVector &v; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Shared Vector Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Construct, assign. */ + Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { } + + /* Construct. */ + Iter( const SVector &v ); + Iter( const IterFirst &vf ); + Iter( const IterLast &vl ); + inline Iter( const IterNext &vn ); + inline Iter( const IterPrev &vp ); + + /* Assign. */ + Iter &operator=( const SVector &v ); + Iter &operator=( const IterFirst &vf ); + Iter &operator=( const IterLast &vl ); + inline Iter &operator=( const IterNext &vf ); + inline Iter &operator=( const IterPrev &vl ); + + /** \brief Less than end? */ + bool lte() const { return ptr != ptrEnd; } + + /** \brief At end? */ + bool end() const { return ptr == ptrEnd; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != ptrBeg; } + + /** \brief At beginning? */ + bool beg() const { return ptr == ptrBeg; } + + /** \brief At first element? */ + bool first() const { return ptr == ptrBeg+1; } + + /** \brief At last element? */ + bool last() const { return ptr == ptrEnd-1; } + + /* Return the position. */ + long pos() const { return ptr - ptrBeg - 1; } + T &operator[](int i) const { return ptr[i]; } + + /** \brief Implicit cast to T*. */ + operator T*() const { return ptr; } + + /** \brief Dereference operator returns T&. */ + T &operator *() const { return *ptr; } + + /** \brief Arrow operator returns T*. */ + T *operator->() const { return ptr; } + + /** \brief Move to next item. */ + T *operator++() { return ++ptr; } + + /** \brief Move to next item. */ + T *operator++(int) { return ptr++; } + + /** \brief Move to next item. */ + T *increment() { return ++ptr; } + + /** \brief Move to previous item. */ + T *operator--() { return --ptr; } + + /** \brief Move to previous item. */ + T *operator--(int) { return ptr--; } + + /** \brief Move to previous item. */ + T *decrement() { return --ptr; } + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the previous item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + T *ptr; + + /* For testing endpoints. */ + T *ptrBeg, *ptrEnd; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + void makeRawSpaceFor(long pos, long len); + + void setAsCommon(long len); + long replaceCommon(long pos, long len); + long insertCommon(long pos, long len); + + void upResize(long len); + void upResizeDup(long len); + void upResizeFromEmpty(long len); + void downResize(long len); + void downResizeDup(long len); +}; + +#if 0 +/* Create a vector with an intial number of elements and size. */ +template <class T, class Resize> SVector<T, Resize>:: + SVector( long size, long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + if ( allocLen > 0 ) { + /* Allocate the data needed. */ + STabHead *head = (STabHead*) malloc( sizeof(STabHead) + + sizeof(T) * allocLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header and save the data pointer. */ + head->refCount = 1; + head->allocLen = allocLen; + head->tabLen = 0; + BaseTable::data = (T*) (head + 1); + } + + /* Grow to the size specified. If we did not have enough space + * allocated that is ok. Table will be grown to the right size. */ + setAsNew( size ); +} +#endif + +/** + * \brief Perform a shallow copy of the vector. + * + * Takes a reference to the contents of the other vector. + */ +template <class T, class Resize> SVector<T, Resize>:: + SVector(const SVector<T, Resize> &v) +{ + /* Take a reference to other, if any data is allocated. */ + if ( v.data == 0 ) + BaseTable::data = 0; + else { + /* Get the source header, up the refcount and ref it. */ + STabHead *srcHead = ((STabHead*) v.data) - 1; + srcHead->refCount += 1; + BaseTable::data = (T*) (srcHead + 1); + } +} + +#if 0 +/** + * \brief Perform a shallow copy of the vector from only the header. + * + * Takes a reference to the contents specified by the header. + */ +template <class T, class Resize> SVector<T, Resize>:: + SVector(STabHead *head) +{ + /* Take a reference to other, if the header is no-null. */ + if ( head == 0 ) + BaseTable::data = 0; + else { + head->refCount += 1; + BaseTable::data = (T*) (head + 1); + } +} +#endif + + +/** + * \brief Shallow copy another vector into this vector. + * + * Takes a reference to the other vector. The contents of this vector are + * first emptied. + * + * \returns A reference to this. + */ +template <class T, class Resize> SVector<T, Resize> & + SVector<T, Resize>:: operator=( const SVector &v ) +{ + /* First clean out the current contents. */ + empty(); + + /* Take a reference to other, if any data is allocated. */ + if ( v.data == 0 ) + BaseTable::data = 0; + else { + /* Get the source header, up the refcount and ref it. */ + STabHead *srcHead = ((STabHead*) v.data) - 1; + srcHead->refCount += 1; + BaseTable::data = (T*) (srcHead + 1); + } + return *this; +} + +/** + * \brief Shallow copy another vector into this vector from only the header. + * + * Takes a reference to the other header vector. The contents of this vector + * are first emptied. + * + * \returns A reference to this. + */ +template <class T, class Resize> SVector<T, Resize> & + SVector<T, Resize>::operator=( STabHead *head ) +{ + /* First clean out the current contents. */ + empty(); + + /* Take a reference to other, if the header is no-null. */ + if ( head == 0 ) + BaseTable::data = 0; + else { + head->refCount += 1; + BaseTable::data = (T*) (head + 1); + } + return *this; +} + +/* Init a vector iterator with just a vector. */ +template <class T, class Resize> SVector<T, Resize>:: + Iter::Iter( const SVector &v ) +{ + long length; + if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+length; + } +} + +/* Init a vector iterator with the first of a vector. */ +template <class T, class Resize> SVector<T, Resize>:: + Iter::Iter( const IterFirst &vf ) +{ + long length; + if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+length; + } +} + +/* Init a vector iterator with the last of a vector. */ +template <class T, class Resize> SVector<T, Resize>:: + Iter::Iter( const IterLast &vl ) +{ + long length; + if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+length-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+length; + } +} + +/* Init a vector iterator with the next of some other iterator. */ +template <class T, class Resize> SVector<T, Resize>:: + Iter::Iter( const IterNext &vn ) +: + ptr(vn.i.ptr+1), + ptrBeg(vn.i.ptrBeg), + ptrEnd(vn.i.ptrEnd) +{ +} + +/* Init a vector iterator with the prev of some other iterator. */ +template <class T, class Resize> SVector<T, Resize>:: + Iter::Iter( const IterPrev &vp ) +: + ptr(vp.i.ptr-1), + ptrBeg(vp.i.ptrBeg), + ptrEnd(vp.i.ptrEnd) +{ +} + +/* Set a vector iterator with some vector. */ +template <class T, class Resize> typename SVector<T, Resize>::Iter & + SVector<T, Resize>::Iter::operator=( const SVector &v ) +{ + long length; + if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+length; + } + return *this; +} + +/* Set a vector iterator with the first element in a vector. */ +template <class T, class Resize> typename SVector<T, Resize>::Iter & + SVector<T, Resize>::Iter::operator=( const IterFirst &vf ) +{ + long length; + if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+length; + } + return *this; +} + +/* Set a vector iterator with the last element in a vector. */ +template <class T, class Resize> typename SVector<T, Resize>::Iter & + SVector<T, Resize>::Iter::operator=( const IterLast &vl ) +{ + long length; + if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+length-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+length; + } + return *this; +} + +/* Set a vector iterator with the next of some other iterator. */ +template <class T, class Resize> typename SVector<T, Resize>::Iter & + SVector<T, Resize>::Iter::operator=( const IterNext &vn ) +{ + ptr = vn.i.ptr+1; + ptrBeg = vn.i.ptrBeg; + ptrEnd = vn.i.ptrEnd; + return *this; +} + +/* Set a vector iterator with the prev of some other iterator. */ +template <class T, class Resize> typename SVector<T, Resize>::Iter & + SVector<T, Resize>::Iter::operator=( const IterPrev &vp ) +{ + ptr = vp.i.ptr-1; + ptrBeg = vp.i.ptrBeg; + ptrEnd = vp.i.ptrEnd; + return *this; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new length. Reads and writes allocLen. Does not read or write length. + * Assumes that there is some data allocated already. */ +template <class T, class Resize> void SVector<T, Resize>:: + upResize(long len) +{ + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(head->allocLen, len); + + /* Did the data grow? */ + if ( newLen > head->allocLen ) { + head->allocLen = newLen; + + /* Table exists already, resize it up. */ + head = (STabHead*) realloc( head, sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); + } +} + +/* Allocates a new buffer for an up resize that requires a duplication of the + * data. Uses Resize::upResize to get the allocation length. Reads and writes + * allocLen. This upResize does write the new length. Assumes that there is + * some data allocated already. */ +template <class T, class Resize> void SVector<T, Resize>:: + upResizeDup(long len) +{ + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(head->allocLen, len); + + /* Dereferencing the existing data, decrement the refcount. */ + head->refCount -= 1; + + /* Table exists already, resize it up. */ + head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new length. Reads and writes allocLen. This upresize DOES write length. + * Assumes that no data is allocated. */ +template <class T, class Resize> void SVector<T, Resize>:: + upResizeFromEmpty(long len) +{ + /* There is no table yet. If the len is zero, then there is no need to + * create a table. */ + if ( len > 0 ) { + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(0, len); + + /* If len is greater than zero then we are always allocating the table. */ + STabHead *head = (STabHead*) malloc( sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header and save the data pointer. Note that we set the + * length here. This differs from the other upResizes. */ + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + BaseTable::data = (T*) (head + 1); + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new length. Reads and writes allocLen. Does not read or write length. */ +template <class T, class Resize> void SVector<T, Resize>:: + downResize(long len) +{ + /* If there is already no length, then there is nothing we can do. */ + if ( BaseTable::data != 0 ) { + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::downResize( head->allocLen, len ); + + /* Did the data shrink? */ + if ( newLen < head->allocLen ) { + if ( newLen == 0 ) { + /* Simply free the data. */ + free( head ); + BaseTable::data = 0; + } + else { + /* Save the new allocated length. */ + head->allocLen = newLen; + + /* Not shrinking to size zero, realloc it to the smaller size. */ + head = (STabHead*) realloc( head, sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the new data ptr. */ + BaseTable::data = (T*) (head + 1); + } + } + } +} + +/* Allocate a new buffer for a down resize and duplication of the array. The + * new array will be len long and allocation size will be determined using + * Resize::downResize with the old array's allocLen. Does not actually copy + * any data. Reads and writes allocLen and writes the new len. */ +template <class T, class Resize> void SVector<T, Resize>:: + downResizeDup(long len) +{ + /* If there is already no length, then there is nothing we can do. */ + if ( BaseTable::data != 0 ) { + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::downResize( head->allocLen, len ); + + /* Detaching from the existing head, decrement the refcount. */ + head->refCount -= 1; + + /* Not shrinking to size zero, malloc it to the smaller size. */ + head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the new allocated length. */ + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); + } +} + +/** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ +template <class T, class Resize> void SVector<T, Resize>:: + empty() +{ + if ( BaseTable::data != 0 ) { + /* Get the header and drop the refcount on the data. */ + STabHead *head = ((STabHead*) BaseTable::data) - 1; + head->refCount -= 1; + + /* If the refcount just went down to zero nobody else is referencing + * the data. */ + if ( head->refCount == 0 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < head->tabLen; pos++, i++ ) + pos->~T(); + + /* Free the data space. */ + free( head ); + } + + /* Clear the pointer. */ + BaseTable::data = 0; + } +} + +/* Prepare for setting the contents of the vector to some array len long. + * Handles reusing the existing space, detaching from a common space or + * growing from zero length automatically. */ +template <class T, class Resize> void SVector<T, Resize>:: + setAsCommon(long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If the refCount is one, then we can reuse the space. Otherwise we + * must detach from the referenced data create new space. */ + if ( head->refCount == 1 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < head->tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < head->tabLen ) + downResize( len ); + else if ( len > head->tabLen ) + upResize( len ); + + if ( BaseTable::data != 0 ) { + /* Get the header again and set the length. */ + head = ((STabHead*)BaseTable::data) - 1; + head->tabLen = len; + } + } + else { + /* Just detach from the data. */ + head->refCount -= 1; + BaseTable::data = 0; + + /* Make enough space. This will set the length. */ + upResizeFromEmpty( len ); + } + } + else { + /* The table is currently empty. Make enough space. This will set the + * length. */ + upResizeFromEmpty( len ); + } +} + +/** + * \brief Set the contents of the vector to be len elements exactly. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Copy constructors are used to place the + * new elements in the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + setAs(const T *val, long len) +{ + /* Common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Copy data in. */ + T *dst = BaseTable::data; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + + +/** + * \brief Set the vector to len copies of item. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * copy the item into the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + setAsDup(const T &item, long len) +{ + /* Do the common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Copy item in one spot at a time. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + +/** + * \brief Set the vector to exactly len new items. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Default constructors are used to init the + * new items. + */ +template <class T, class Resize> void SVector<T, Resize>:: + setAsNew(long len) +{ + /* Do the common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Create items using default constructor. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/* Make space in vector for a replacement at pos of len items. Handles reusing + * existing space, detaching or growing from zero space. */ +template <class T, class Resize> long SVector<T, Resize>:: + replaceCommon(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to replace at then treat it as + * a position relative to the length. This doesn't have any meaning + * unless the length is at least one. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* The end is the one past the last item that we want to write to. */ + long i, endPos = pos + len; + + if ( head->refCount == 1 ) { + /* We can reuse the space. Make sure we have enough space. */ + if ( endPos > head->tabLen ) { + upResize( endPos ); + + /* Get the header again, whose addr may have changed after + * resizing. */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Delete any objects we need to delete. */ + T *item = BaseTable::data + pos; + for ( i = pos; i < head->tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + head->tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + T *item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + } + else { + /* Use endPos to calc the end of the vector. */ + long newLen = endPos; + if ( newLen < head->tabLen ) + newLen = head->tabLen; + + /* Duplicate and grow up to endPos. This will set the length. */ + upResizeDup( newLen ); + + /* Copy from src up to pos. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++) + new(dst) T(*src); + + /* Copy any items after the replace range. */ + for ( i += len, src += len, dst += len; + i < head->tabLen; i++, dst++, src++ ) + new(dst) T(*src); + } + } + else { + /* There is no data initially, must grow from zero. This will set the + * new length. */ + upResizeFromEmpty( len ); + } + + return pos; +} + + +/** + * \brief Replace len elements at position pos. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. Copy constructors are used + * to place the elements into the vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + replace(long pos, const T *val, long len) +{ + /* Common work for replacing in the vector. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Replace at position pos with len copies of an item. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The copy constructor is + * used to place the element into this vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + replaceDup(long pos, const T &val, long len) +{ + /* Common replacement stuff. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(val); +} + +/** + * \brief Replace at position pos with len new elements. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The default constructor is + * used to initialize the new elements. It is allowable for the pos and length + * to specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + replaceNew(long pos, long len) +{ + /* Do the common replacement stuff. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/** + * \brief Remove len elements at position pos. + * + * Destructor is called on all elements removed. Elements to the right of pos + * are shifted len spaces to the left to take up the free space. If pos is + * greater than or equal to the length of the vector then undefined behavior + * results. If pos is negative then it is treated as an offset relative to the + * length of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + remove(long pos, long len) +{ + /* If there is no data, we can't delete anything anyways. */ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* The first position after the last item deleted. */ + long endPos = pos + len; + + /* The New data length. */ + long i, newLen = head->tabLen - len; + + if ( head->refCount == 1 ) { + /* We are the only ones using the data. We can reuse + * the existing space. */ + + /* The place in the data we are deleting at. */ + T *dst = BaseTable::data + pos; + + /* Call Destructors. */ + T *item = BaseTable::data + pos; + for ( i = 0; i < len; i += 1, item += 1 ) + item->~T(); + + /* Shift data over if necessary. */ + long lenToSlideOver = head->tabLen - endPos; + if ( len > 0 && lenToSlideOver > 0 ) + memmove(BaseTable::data + pos, dst + len, sizeof(T)*lenToSlideOver); + + /* Shrink the data if necessary. */ + downResize( newLen ); + + if ( BaseTable::data != 0 ) { + /* Get the header again (because of the resize) and set the + * new data length. */ + head = ((STabHead*)BaseTable::data) - 1; + head->tabLen = newLen; + } + } + else { + /* Must detach from the common data. Just copy the non-deleted + * items from the common data. */ + + /* Duplicate and grow down to newLen. This will set the length. */ + downResizeDup( newLen ); + + /* Copy over just the non-deleted parts. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( i += len, src += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } +} + +/* Shift over existing data. Handles reusing existing space, detaching or + * growing from zero space. */ +template <class T, class Resize> long SVector<T, Resize>:: + insertCommon(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to insert at then treat it as a + * position relative to the length. This only has meaning if there is + * existing data. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* Calculate the new length. */ + long i, newLen = head->tabLen + len; + + if ( head->refCount == 1 ) { + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Get the header again, (the addr may have changed after + * resizing). */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < head->tabLen ) { + memmove( BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(head->tabLen - pos) ); + } + + /* Grow the length by the len inserted. */ + head->tabLen += len; + } + else { + /* Need to detach from the existing array. Copy over the other + * parts. This will set the length. */ + upResizeDup( newLen ); + + /* Copy over the parts around the insert. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( dst += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } + else { + /* There is no existing data. Start from zero. This will set the + * length. */ + upResizeFromEmpty( len ); + } + + return pos; +} + + +/** + * \brief Insert len elements at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the elements into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + insert(long pos, const T *val, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Copy data in element by element. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Insert len copies of item at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the element into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + insertDup(long pos, const T &item, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Copy the data item in one at a time. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + + +/** + * \brief Insert len new elements using the default constructor. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * Default constructors are used to init the new elements. If pos is off the + * end of the vector then undefined behaviour results. If pos is negative then + * it is treated as an offset relative to the length of the vector. + */ +template <class T, class Resize> void SVector<T, Resize>:: + insertNew(long pos, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Init new data with default constructors. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/* Makes space for len items, Does not init the items in any way. If pos is + * greater than the length of the vector then undefined behaviour results. + * Updates the length of the vector. */ +template <class T, class Resize> void SVector<T, Resize>:: + makeRawSpaceFor(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Calculate the new length. */ + long i, newLen = head->tabLen + len; + + if ( head->refCount == 1 ) { + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Get the header again, (the addr may have changed after + * resizing). */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < head->tabLen ) { + memmove( BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(head->tabLen - pos) ); + } + + /* Grow the length by the len inserted. */ + head->tabLen += len; + } + else { + /* Need to detach from the existing array. Copy over the other + * parts. This will set the length. */ + upResizeDup( newLen ); + + /* Copy over the parts around the insert. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( dst += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } + else { + /* There is no existing data. Start from zero. This will set the + * length. */ + upResizeFromEmpty( len ); + } +} + + +#ifdef AAPL_NAMESPACE +} +#endif + + +#endif /* _AAPL_SVECTOR_H */ diff --git a/aapl/table.h b/aapl/table.h new file mode 100644 index 0000000..c1f2b7b --- /dev/null +++ b/aapl/table.h @@ -0,0 +1,252 @@ +/* + * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_TABLE_H +#define _AAPL_TABLE_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class Table + * \brief Base class for dynamic arrays. + * + * Table is used as the common data storage class for vectors. It does not + * provide any methods to operate on the data and as such it is not intended + * to be used directly. It exists so that algorithms that operatate on dynamic + * arrays can be written without knowing about the various vector classes that + * my exist. + */ + +/*@}*/ + +/* Table class. */ +template <class T> class Table +{ +public: + /* Default Constructor. */ + inline Table(); + + /** + * \brief Get the length of the vector. + * + * \returns the length of the vector. + */ + long length() const + { return tabLen; } + + /** + * \brief Table data. + * + * The pointer to the elements in the vector. Modifying the vector may + * cause this pointer to change. + */ + T *data; + + /** + * \brief Table length. + * + * The number of items of type T in the table. + */ + long tabLen; + + /** + * \brief Allocated length. + * + * The number of items for which there is room in the current allocation. + */ + long allocLen; +}; + +/** + * \brief Default constructor + * + * Initialize table data to empty. + */ +template <class T> inline Table<T>::Table() +: + data(0), + tabLen(0), + allocLen(0) +{ +} + +/* Default shared table header class. */ +struct STabHead +{ + /** + * \brief Table length. + * + * The number of items of type T in the table. + */ + long tabLen; + + /** + * \brief Allocated length. + * + * The number of items for which there is room in the current allocation. + */ + long allocLen; + + /** + * \brief Ref Count. + * + * The number of shared vectors referencing this data. + */ + long refCount; +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class STable + * \brief Base class for implicitly shared dynamic arrays. + * + * STable is used as the common data storage class for shared vectors. It does + * not provide any methods to operate on the data and as such it is not + * intended to be used directly. It exists so that algorithms that operatate + * on dynamic arrays can be written without knowing about the various shared + * vector classes that my exist. + */ + +/*@}*/ + +/* STable class. */ +template <class T> class STable +{ +public: + /* Default Constructor. */ + inline STable(); + + /** + * \brief Get the length of the shared vector. + * + * \returns the length of the shared vector. + */ + long length() const + { return data == 0 ? 0 : (((STabHead*)data) - 1)->tabLen; } + + /** + * \brief Get header of the shared vector. + * + * \returns the header of the shared vector. + */ + STabHead *header() const + { return data == 0 ? 0 : (((STabHead*)data) - 1); } + + /** + * \brief Table data. + * + * The pointer to the elements in the vector. The shared table header is + * located just behind the data. Modifying the vector may cause this + * pointer to change. + */ + T *data; +}; + +/** + * \brief Default constructor + * + * Initialize shared table data to empty. + */ +template <class T> inline STable<T>::STable() +: + data(0) +{ +} + +/* If needed is greater than existing, give twice needed. */ +#define EXPN_UP( existing, needed ) \ + needed > existing ? (needed<<1) : existing + +/* If needed is less than 1 quarter existing, give twice needed. */ +#define EXPN_DOWN( existing, needed ) \ + needed < (existing>>2) ? (needed<<1) : existing + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeExpn + * \brief Exponential table resizer. + * + * ResizeExpn is the default table resizer. When an up resize is needed, space + * is doubled. When a down resize is needed, space is halved. The result is + * that when growing the vector in a linear fashion, the number of resizes of + * the allocated space behaves logarithmically. + * + * If only up resizes are done, there will never be more than 2 times the + * needed space allocated. If down resizes are done as well, there will never + * be more than 4 times the needed space allocated. ResizeExpn uses this 50% + * usage policy on up resizing and 25% usage policy on down resizing to + * improve performance when repeatedly inserting and removing a small number + * of elements relative to the size of the array. This scheme guarantees that + * repetitive inserting and removing of a small number of elements will never + * result in repetative reallocation. + * + * The sizes passed to the resizer from the vectors are in units of T. + */ + +/*@}*/ + +/* Exponential resizer. */ +class ResizeExpn +{ +protected: + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed then allocate + * twice the space needed. Otherwise use the existing size. + * + * \returns The new table size. + */ + static inline long upResize( long existing, long needed ) + { return EXPN_UP( existing, needed ); } + + /** + * \brief Determine the new table size when down resizing. + * + * If the space needed is less than one quarter of the existing size then + * allocate twice the space needed. Otherwise use the exitsing size. + * + * \returns The new table size. + */ + static inline long downResize( long existing, long needed ) + { return EXPN_DOWN( existing, needed ); } +}; + +#undef EXPN_UP +#undef EXPN_DOWN + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_TABLE_H */ diff --git a/aapl/vector.h b/aapl/vector.h new file mode 100644 index 0000000..c33e35b --- /dev/null +++ b/aapl/vector.h @@ -0,0 +1,1202 @@ +/* + * Copyright 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_VECTOR_H +#define _AAPL_VECTOR_H + +#include <new> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class Vector + * \brief Dynamic array. + * + * This is typical vector implementation. It is a dynamic array that can be + * used to contain complex data structures that have constructors and + * destructors as well as simple types such as integers and pointers. + * + * Vector supports inserting, overwriting, and removing single or multiple + * elements at once. Constructors and destructors are called wherever + * appropriate. For example, before an element is overwritten, it's + * destructor is called. + * + * Vector provides automatic resizing of allocated memory as needed and offers + * different allocation schemes for controlling how the automatic allocation + * is done. Two senses of the the length of the data is maintained: the + * amount of raw memory allocated to the vector and the number of actual + * elements in the vector. The various allocation schemes control how the + * allocated space is changed in relation to the number of elements in the + * vector. + * + * \include ex_vector.cpp + */ + +/*@}*/ + +template < class T, class Resize = ResizeExpn > class Vector + : public Table<T>, public Resize +{ +private: + typedef Table<T> BaseTable; + +public: + /** + * \brief Initialize an empty vector with no space allocated. + * + * If a linear resizer is used, the step defaults to 256 units of T. For a + * runtime vector both up and down allocation schemes default to + * Exponential. + */ + Vector() { } + + /** + * \brief Create a vector that contains an initial element. + * + * The vector becomes one element in length. The element's copy + * constructor is used to place the value in the vector. + */ + Vector(const T &val) { setAs(&val, 1); } + + /** + * \brief Create a vector that contains an array of elements. + * + * The vector becomes len elements in length. Copy constructors are used + * to place the new elements in the vector. + */ + Vector(const T *val, long len) { setAs(val, len); } + + /* Deep copy. */ + Vector( const Vector &v ); + + /* Free all mem used by the vector. */ + ~Vector() { empty(); } + + /* Delete all items. */ + void empty(); + + /* Abandon the contents of the vector without deleteing. */ + void abandon(); + + /* Performs a shallow copy of another vector into this vector. If this + * vector is non-empty then its contents are lost (not freed). */ + void shallowCopy( const Vector &v ); + + /* Perform a deep copy of another vector into this vector. */ + Vector &operator=( const Vector &v ); + + + /*@{*/ + /** + * \brief Insert one element at position pos. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The copy constructor is used to place the element into this + * vector. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative then it is treated as an offset + * relative to the length of the vector. + */ + void insert(long pos, const T &val) { insert(pos, &val, 1); } + + /* Insert an array of values. */ + void insert(long pos, const T *val, long len); + + /** + * \brief Insert all the elements from another vector at position pos. + * + * Elements in this vector from pos onward are shifted v.tabLen spaces to + * the right. The element's copy constructor is used to copy the items + * into this vector. The other vector is left unchanged. If pos is off the + * end of the vector, then undefined behaviour results. If pos is negative + * then it is treated as an offset relative to the length of the vector. + * Equivalent to vector.insert(pos, other.data, other.tabLen). + */ + void insert(long pos, const Vector &v) { insert(pos, v.data, v.tabLen); } + + /* Insert len copies of val into the vector. */ + void insertDup(long pos, const T &val, long len); + + /** + * \brief Insert one new element using the default constrcutor. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The default constructor is used to init the new element. If pos + * is greater than the length of the vector then undefined behaviour + * results. If pos is negative then it is treated as an offset relative to + * the length of the vector. + */ + void insertNew(long pos) { insertNew(pos, 1); } + + /* Insert len new items using default constructor. */ + void insertNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Remove one element at position pos. + * + * The element's destructor is called. Elements to the right of pos are + * shifted one space to the left to take up the free space. If pos is greater + * than or equal to the length of the vector then undefined behavior results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ + void remove(long pos) { remove(pos, 1); } + + /* Delete a number of elements. */ + void remove(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Replace one element at position pos. + * + * If there is an existing element at position pos (if pos is less than + * the length of the vector) then its destructor is called before the + * space is used. The copy constructor is used to place the element into + * the vector. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative then it is treated as + * an offset relative to the length of the vector. + */ + void replace(long pos, const T &val) { replace(pos, &val, 1); } + + /* Replace with an array of values. */ + void replace(long pos, const T *val, long len); + + /** + * \brief Replace at position pos with all the elements of another vector. + * + * Replace at position pos with all the elements of another vector. The + * other vector is left unchanged. If there are existing elements at the + * positions to be replaced, then destructors are called before the space + * is used. Copy constructors are used to place the elements into this + * vector. It is allowable for the pos and length of the other vector to + * specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ + void replace(long pos, const Vector &v) { replace(pos, v.data, v.tabLen); } + + /* Replace len items with len copies of val. */ + void replaceDup(long pos, const T &val, long len); + + /** + * \brief Replace at position pos with one new element. + * + * If there is an existing element at the position to be replaced (pos is + * less than the length of the vector) then the element's destructor is + * called before the space is used. The default constructor is used to + * initialize the new element. If pos is greater than the length of the + * vector then undefined behaviour results. If pos is negative, then it is + * treated as an offset relative to the length of the vector. + */ + void replaceNew(long pos) { replaceNew(pos, 1); } + + /* Replace len items at pos with newly constructed objects. */ + void replaceNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Set the contents of the vector to be val exactly. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used + * to place the val in the vector. + */ + void setAs(const T &val) { setAs(&val, 1); } + + /* Set to the contents of an array. */ + void setAs(const T *val, long len); + + /** + * \brief Set the vector to exactly the contents of another vector. + * + * The vector becomes v.tabLen elements in length. Destructors are called + * on any existing elements. Copy constructors are used to place the new + * elements in the vector. + */ + void setAs(const Vector &v) { setAs(v.data, v.tabLen); } + + /* Set as len copies of item. */ + void setAsDup(const T &item, long len); + + /** + * \brief Set the vector to exactly one new item. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The default constructor is used to + * init the new item. + */ + void setAsNew() { setAsNew(1); } + + /* Set as newly constructed objects using the default constructor. */ + void setAsNew(long len); + /*@}*/ + + /*@{*/ + /** + * \brief Append one elment to the end of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void append(const T &val) { replace(BaseTable::tabLen, &val, 1); } + + /** + * \brief Append len elements to the end of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void append(const T *val, long len) { replace(BaseTable::tabLen, val, len); } + + /** + * \brief Append the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place the + * elements in the vector. + */ + void append(const Vector &v) { replace(BaseTable::tabLen, v.data, v.tabLen); } + + /** + * \brief Append len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void appendDup(const T &item, long len) { replaceDup(BaseTable::tabLen, item, len); } + + /** + * \brief Append a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void appendNew() { replaceNew(BaseTable::tabLen, 1); } + + /** + * \brief Append len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void appendNew(long len) { replaceNew(BaseTable::tabLen, len); } + /*@}*/ + + /*@{*/ + /** \fn Vector::prepend(const T &val) + * \brief Prepend one elment to the front of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void prepend(const T &val) { insert(0, &val, 1); } + + /** + * \brief Prepend len elements to the front of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void prepend(const T *val, long len) { insert(0, val, len); } + + /** + * \brief Prepend the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place the + * elements in the vector. + */ + void prepend(const Vector &v) { insert(0, v.data, v.tabLen); } + + /** + * \brief Prepend len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void prependDup(const T &item, long len) { insertDup(0, item, len); } + + /** + * \brief Prepend a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void prependNew() { insertNew(0, 1); } + + /** + * \brief Prepend len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void prependNew(long len) { insertNew(0, len); } + /*@}*/ + + /* Convenience access. */ + T &operator[](int i) const { return BaseTable::data[i]; } + long size() const { return BaseTable::tabLen; } + + /* Forward this so a ref can be used. */ + struct Iter; + + /* Various classes for setting the iterator */ + struct IterFirst { IterFirst( const Vector &v ) : v(v) { } const Vector &v; }; + struct IterLast { IterLast( const Vector &v ) : v(v) { } const Vector &v; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Vector Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Construct, assign. */ + Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { } + + /* Construct. */ + Iter( const Vector &v ); + Iter( const IterFirst &vf ); + Iter( const IterLast &vl ); + inline Iter( const IterNext &vn ); + inline Iter( const IterPrev &vp ); + + /* Assign. */ + Iter &operator=( const Vector &v ); + Iter &operator=( const IterFirst &vf ); + Iter &operator=( const IterLast &vl ); + inline Iter &operator=( const IterNext &vf ); + inline Iter &operator=( const IterPrev &vl ); + + /** \brief Less than end? */ + bool lte() const { return ptr != ptrEnd; } + + /** \brief At end? */ + bool end() const { return ptr == ptrEnd; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != ptrBeg; } + + /** \brief At beginning? */ + bool beg() const { return ptr == ptrBeg; } + + /** \brief At first element? */ + bool first() const { return ptr == ptrBeg+1; } + + /** \brief At last element? */ + bool last() const { return ptr == ptrEnd-1; } + + /* Return the position. */ + long pos() const { return ptr - ptrBeg - 1; } + T &operator[](int i) const { return ptr[i]; } + + /** \brief Implicit cast to T*. */ + operator T*() const { return ptr; } + + /** \brief Dereference operator returns T&. */ + T &operator *() const { return *ptr; } + + /** \brief Arrow operator returns T*. */ + T *operator->() const { return ptr; } + + /** \brief Move to next item. */ + T *operator++() { return ++ptr; } + + /** \brief Move to next item. */ + T *operator++(int) { return ptr++; } + + /** \brief Move to next item. */ + T *increment() { return ++ptr; } + + /** \brief Move n items forward. */ + T *operator+=(long n) { return ptr+=n; } + + /** \brief Move to previous item. */ + T *operator--() { return --ptr; } + + /** \brief Move to previous item. */ + T *operator--(int) { return ptr--; } + + /** \brief Move to previous item. */ + T *decrement() { return --ptr; } + + /** \brief Move n items back. */ + T *operator-=(long n) { return ptr-=n; } + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the previous item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + T *ptr; + + /* For testing endpoints. */ + T *ptrBeg, *ptrEnd; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + void makeRawSpaceFor(long pos, long len); + + void upResize(long len); + void downResize(long len); +}; + +#if 0 +/* Create a vector with an intial number of elements and size. */ +template<class T, class Resize> Vector<T, Resize>:: + Vector( long size, long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + BaseTable::allocLen = allocLen; + if ( allocLen > 0 ) { + BaseTable::data = (T*) malloc(sizeof(T) * BaseTable::allocLen); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + + /* Grow to the size specified. If we did not have enough space + * allocated that is ok. Table will be grown to right size. */ + setAsNew( size ); +} +#endif + +/* Init a vector iterator with just a vector. */ +template <class T, class Resize> Vector<T, Resize>::Iter::Iter( const Vector &v ) +{ + if ( v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+v.tabLen; + } +} + +/* Init a vector iterator with the first of a vector. */ +template <class T, class Resize> Vector<T, Resize>::Iter::Iter( + const IterFirst &vf ) +{ + if ( vf.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+vf.v.tabLen; + } +} + +/* Init a vector iterator with the last of a vector. */ +template <class T, class Resize> Vector<T, Resize>::Iter::Iter( + const IterLast &vl ) +{ + if ( vl.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+vl.v.tabLen-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+vl.v.tabLen; + } +} + +/* Init a vector iterator with the next of some other iterator. */ +template <class T, class Resize> Vector<T, Resize>::Iter::Iter( + const IterNext &vn ) +: + ptr(vn.i.ptr+1), + ptrBeg(vn.i.ptrBeg), + ptrEnd(vn.i.ptrEnd) +{ +} + +/* Init a vector iterator with the prev of some other iterator. */ +template <class T, class Resize> Vector<T, Resize>::Iter::Iter( + const IterPrev &vp ) +: + ptr(vp.i.ptr-1), + ptrBeg(vp.i.ptrBeg), + ptrEnd(vp.i.ptrEnd) +{ +} + +/* Set a vector iterator with some vector. */ +template <class T, class Resize> typename Vector<T, Resize>::Iter & + Vector<T, Resize>::Iter::operator=( const Vector &v ) +{ + if ( v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the first element in a vector. */ +template <class T, class Resize> typename Vector<T, Resize>::Iter & + Vector<T, Resize>::Iter::operator=( const IterFirst &vf ) +{ + if ( vf.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+vf.v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the last element in a vector. */ +template <class T, class Resize> typename Vector<T, Resize>::Iter & + Vector<T, Resize>::Iter::operator=( const IterLast &vl ) +{ + if ( vl.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+vl.v.tabLen-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+vl.v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the next of some other iterator. */ +template <class T, class Resize> typename Vector<T, Resize>::Iter & + Vector<T, Resize>::Iter::operator=( const IterNext &vn ) +{ + ptr = vn.i.ptr+1; + ptrBeg = vn.i.ptrBeg; + ptrEnd = vn.i.ptrEnd; + return *this; +} + +/* Set a vector iterator with the prev of some other iterator. */ +template <class T, class Resize> typename Vector<T, Resize>::Iter & + Vector<T, Resize>::Iter::operator=( const IterPrev &vp ) +{ + ptr = vp.i.ptr-1; + ptrBeg = vp.i.ptrBeg; + ptrEnd = vp.i.ptrEnd; + return *this; +} + +/** + * \brief Forget all elements in the vector. + * + * The contents of the vector are reset to null without without the space + * being freed. + */ +template<class T, class Resize> void Vector<T, Resize>:: + abandon() +{ + BaseTable::data = 0; + BaseTable::tabLen = 0; + BaseTable::allocLen = 0; +} + +/** + * \brief Shallow copy another vector into this vector. + * + * The dynamic array of the other vector is copied into this vector by + * reference. If this vector is non-empty then its contents are lost. This + * routine must be used with care. After a shallow copy one vector should + * abandon its contents to prevent both destructors from attempting to free + * the common array. + */ +template<class T, class Resize> void Vector<T, Resize>:: + shallowCopy( const Vector &v ) +{ + BaseTable::data = v.data; + BaseTable::tabLen = v.tabLen; + BaseTable::allocLen = v.allocLen; +} + +/** + * \brief Deep copy another vector into this vector. + * + * Copies the entire contents of the other vector into this vector. Any + * existing contents are first deleted. Equivalent to setAs. + * + * \returns A reference to this. + */ +template<class T, class Resize> Vector<T, Resize> &Vector<T, Resize>:: + operator=( const Vector &v ) +{ + setAs(v.data, v.tabLen); + return *this; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +template<class T, class Resize> void Vector<T, Resize>:: + upResize(long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = Resize::upResize(BaseTable::allocLen, len); + + /* Did the data grow? */ + if ( newLen > BaseTable::allocLen ) { + BaseTable::allocLen = newLen; + if ( BaseTable::data != 0 ) { + /* Table exists already, resize it up. */ + BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + else { + /* Create the data. */ + BaseTable::data = (T*) malloc( sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +template<class T, class Resize> void Vector<T, Resize>:: + downResize(long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = Resize::downResize( BaseTable::allocLen, len ); + + /* Did the data shrink? */ + if ( newLen < BaseTable::allocLen ) { + BaseTable::allocLen = newLen; + if ( newLen == 0 ) { + /* Simply free the data. */ + free( BaseTable::data ); + BaseTable::data = 0; + } + else { + /* Not shrinking to size zero, realloc it to the smaller size. */ + BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + } +} + +/** + * \brief Perform a deep copy of the vector. + * + * The contents of the other vector are copied into this vector. This vector + * gets the same allocation size as the other vector. All items are copied + * using the element's copy constructor. + */ +template<class T, class Resize> Vector<T, Resize>:: + Vector(const Vector<T, Resize> &v) +{ + BaseTable::tabLen = v.tabLen; + BaseTable::allocLen = v.allocLen; + + if ( BaseTable::allocLen > 0 ) { + /* Allocate needed space. */ + BaseTable::data = (T*) malloc(sizeof(T) * BaseTable::allocLen); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + + /* If there are any items in the src data, copy them in. */ + T *dst = BaseTable::data, *src = v.data; + for (long pos = 0; pos < BaseTable::tabLen; pos++, dst++, src++ ) + new(dst) T(*src); + } + else { + /* Nothing allocated. */ + BaseTable::data = 0; + } +} + +/** \fn Vector::~Vector() + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ + + +/** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ +template<class T, class Resize> void Vector<T, Resize>:: + empty() +{ + if ( BaseTable::data != 0 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Free the data space. */ + free( BaseTable::data ); + BaseTable::data = 0; + BaseTable::tabLen = BaseTable::allocLen = 0; + } +} + +/** + * \brief Set the contents of the vector to be len elements exactly. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Copy constructors are used to place the + * new elements in the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + setAs(const T *val, long len) +{ + /* Call All destructors. */ + long i; + T *pos = BaseTable::data; + for ( i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Copy data in. */ + T *dst = BaseTable::data; + const T *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Set the vector to len copies of item. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * copy the item into the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + setAsDup(const T &item, long len) +{ + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Copy item in one spot at a time. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + +/** + * \brief Set the vector to exactly len new items. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Default constructors are used to init the + * new items. + */ +template<class T, class Resize> void Vector<T, Resize>:: + setAsNew(long len) +{ + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Create items using default constructor. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + + +/** + * \brief Replace len elements at position pos. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. Copy constructors are used + * to place the elements into the vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + replace(long pos, const T *val, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Replace at position pos with len copies of an item. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The copy constructor is + * used to place the element into this vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + replaceDup(long pos, const T &val, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(val); +} + +/** + * \brief Replace at position pos with len new elements. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The default constructor is + * used to initialize the new elements. It is allowable for the pos and length + * to specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + replaceNew(long pos, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/** + * \brief Remove len elements at position pos. + * + * Destructor is called on all elements removed. Elements to the right of pos + * are shifted len spaces to the left to take up the free space. If pos is + * greater than or equal to the length of the vector then undefined behavior + * results. If pos is negative then it is treated as an offset relative to the + * length of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + remove(long pos, long len) +{ + long newLen, lenToSlideOver, endPos; + T *dst, *item; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The first position after the last item deleted. */ + endPos = pos + len; + + /* The new data length. */ + newLen = BaseTable::tabLen - len; + + /* The place in the data we are deleting at. */ + dst = BaseTable::data + pos; + + /* Call Destructors. */ + item = dst; + for ( long i = 0; i < len; i += 1, item += 1 ) + item->~T(); + + /* Shift data over if necessary. */ + lenToSlideOver = BaseTable::tabLen - endPos; + if ( len > 0 && lenToSlideOver > 0 ) + memmove(dst, dst + len, sizeof(T)*lenToSlideOver); + + /* Shrink the data if necessary. */ + downResize( newLen ); + + /* Set the new data length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len elements at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the elements into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + insert(long pos, const T *val, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Copy data in element by element. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len copies of item at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the element into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + insertDup(long pos, const T &item, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Copy the data item in one at a time. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len new elements using the default constructor. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * Default constructors are used to init the new elements. If pos is off the + * end of the vector then undefined behaviour results. If pos is negative then + * it is treated as an offset relative to the length of the vector. + */ +template<class T, class Resize> void Vector<T, Resize>:: + insertNew(long pos, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Init new data with default constructors. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/* Makes space for len items, Does not init the items in any way. If pos is + * greater than the length of the vector then undefined behaviour results. + * Updates the length of the vector. */ +template<class T, class Resize> void Vector<T, Resize>:: + makeRawSpaceFor(long pos, long len) +{ + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Save the new length. */ + BaseTable::tabLen = newLen; +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_VECTOR_H */ diff --git a/common/Makefile.in b/common/Makefile.in new file mode 100644 index 0000000..a08ce60 --- /dev/null +++ b/common/Makefile.in @@ -0,0 +1,71 @@ +# +# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INCS = -Iaapl +DEFS = + +CFLAGS = -g -Wall +LDFLAGS = + +SUBDIRS = codegen test examples doc + +CC_SRCS = \ + rlparse.cpp rlscan.cpp main.cpp parsetree.cpp \ + parsedata.cpp fsmstate.cpp fsmbase.cpp fsmattach.cpp \ + fsmmin.cpp fsmgraph.cpp fsmap.cpp xmlcodegen.cpp + +GEN_SRC = version.h + +LIBS = @LIBS@ +PREFIX = @prefix@ + +#************************************* + +include ../version.mk + +# Programs +CXX = @CXX@ + +# What kind of header does bison put out? +BISON_HEAD_SUFFIX = @BISON_HEAD_SUFFIX@ + +# Get objects and dependencies from sources. +RAGEL_OBJS = $(RAGEL_CC_SRCS:%.cpp=%.o) +RLCG_OBJS = $(RLCG_CC_SRCS:%.cpp=%.o) +DEPS = $(RAGEL_CC_SRCS:%.cpp=.%.d) $(RLCG_CC_SRCS:%.cpp=.%.d) + +# Rules. +all: $(GEN_SRC) + +version.h: ../version.mk + echo '#define VERSION "$(VERSION)"' > version.h + echo '#define PUBDATE "$(PUBDATE)"' >> version.h + +%.o: %.cpp + @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d + $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $< + +distclean: clean + rm -f Makefile config.h + +clean: + rm -f tags .*.d *.o version.h + +-include $(DEPS) diff --git a/common/buffer.h b/common/buffer.h new file mode 100644 index 0000000..99c4e82 --- /dev/null +++ b/common/buffer.h @@ -0,0 +1,55 @@ +/* + * Copyright 2003 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BUFFER_H +#define _BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif /* _BUFFER_H */ diff --git a/common/common.cpp b/common/common.cpp new file mode 100644 index 0000000..db23235 --- /dev/null +++ b/common/common.cpp @@ -0,0 +1,193 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "common.h" + +HostType hostTypesC[] = +{ + { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) }, + { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, sizeof(short) }, + { "unsigned", "short", false, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, true, INT_MIN, INT_MAX, sizeof(int) }, + { "unsigned", "int", false, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, true, LONG_MIN, LONG_MAX, sizeof(long) }, + { "unsigned", "long", false, 0, ULONG_MAX, sizeof(unsigned long) } +}; + +HostType hostTypesD[] = +{ + { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 }, + { "ubyte", 0, false, 0, UCHAR_MAX, 1 }, + { "char", 0, false, 0, UCHAR_MAX, 1 }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 }, + { "ushort", 0, false, 0, USHRT_MAX, 2 }, + { "wchar", 0, false, 0, USHRT_MAX, 2 }, + { "int", 0, true, INT_MIN, INT_MAX, 4 }, + { "uint", 0, false, 0, UINT_MAX, 4 }, + { "dchar", 0, false, 0, UINT_MAX, 4 } +}; + +HostType hostTypesJava[] = +{ + { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 }, + { "char", 0, false, 0, USHRT_MAX, 2 }, + { "int", 0, true, INT_MIN, INT_MAX, 4 }, +}; + +HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; +HostLang hostLangD = { hostTypesD, 9, hostTypesD+2, true }; +HostLang hostLangJava = { hostTypesJava, 4, hostTypesJava+2, false }; + +HostLang *hostLang = &hostLangC; +HostLangType hostLangType = CCode; + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck(char *paramSpec, int argc, char **argv) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + parameterArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + parameterArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + parameterArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + parameterArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + parameterArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + parameterArg = 0; + argOffset += 1; + state = invalid; + return true; +} + + diff --git a/common/common.h b/common/common.h new file mode 100644 index 0000000..077a3f6 --- /dev/null +++ b/common/common.h @@ -0,0 +1,271 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _COMMON_H +#define _COMMON_H + +#include <climits> + +typedef unsigned long long Size; + +struct Key +{ +private: + long key; + +public: + friend inline Key operator+(const Key key1, const Key key2); + friend inline Key operator-(const Key key1, const Key key2); + friend inline Key operator/(const Key key1, const Key key2); + friend inline long operator&(const Key key1, const Key key2); + + friend inline bool operator<( const Key key1, const Key key2 ); + friend inline bool operator<=( const Key key1, const Key key2 ); + friend inline bool operator>( const Key key1, const Key key2 ); + friend inline bool operator>=( const Key key1, const Key key2 ); + friend inline bool operator==( const Key key1, const Key key2 ); + friend inline bool operator!=( const Key key1, const Key key2 ); + + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + /* Returns the key casted to a long long. This form of the key does not + * require and signedness interpretation. */ + long long getLongLong() const; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const { return ( 32 <= key && key < 127 ); } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } + + void operator+=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key += other.key; + } + + void operator-=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key -= other.key; + } + + void operator|=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key |= other.key; + } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +struct HostType +{ + char *data1; + char *data2; + bool isSigned; + long long minVal; + long long maxVal; + unsigned int size; +}; + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + HostType *defaultAlphType; + bool explicitUnsigned; +}; + + +/* Target language. */ +enum HostLangType +{ + CCode, + DCode, + JavaCode +}; + +extern HostLang *hostLang; +extern HostLangType hostLangType; + +extern HostLang hostLangC; +extern HostLang hostLangD; +extern HostLang hostLangJava; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Default to signed alphabet. */ + KeyOps() : + isSigned(true), + alphType(0) + {} + + /* Default to signed alphabet. */ + KeyOps( bool isSigned ) + :isSigned(isSigned) {} + + bool isSigned; + Key minKey, maxKey; + HostType *alphType; + + void setAlphType( HostType *alphType ) + { + this->alphType = alphType; + isSigned = alphType->isSigned; + if ( isSigned ) { + minKey = (long) alphType->minVal; + maxKey = (long) alphType->maxVal; + } + else { + minKey = (long) (unsigned long) alphType->minVal; + maxKey = (long) (unsigned long) alphType->maxVal; + } + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return isSigned ? + (unsigned long long)( + (long long)key2.key - + (long long)key1.key + 1) : + (unsigned long long)( + (unsigned long)key2.key) - + (unsigned long long)((unsigned long)key1.key) + 1; + } + + Size alphSize() + { return span( minKey, maxKey ); } + + HostType *typeSubsumes( long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } + + HostType *typeSubsumes( bool isSigned, long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( ( isSigned && hostLang->hostTypes[i].isSigned || !isSigned ) && + maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } +}; + +extern KeyOps *keyOps; + +inline bool operator<( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key < key2.key : + (unsigned long)key1.key < (unsigned long)key2.key; +} + +inline bool operator<=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key <= key2.key : + (unsigned long)key1.key <= (unsigned long)key2.key; +} + +inline bool operator>( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key > key2.key : + (unsigned long)key1.key > (unsigned long)key2.key; +} + +inline bool operator>=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key >= key2.key : + (unsigned long)key1.key >= (unsigned long)key2.key; +} + +inline bool operator==( const Key key1, const Key key2 ) +{ + return key1.key == key2.key; +} + +inline bool operator!=( const Key key1, const Key key2 ) +{ + return key1.key != key2.key; +} + +/* Decrement. Needed only for ranges. */ +inline void Key::decrement() +{ + key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1; +} + +/* Increment. Needed only for ranges. */ +inline void Key::increment() +{ + key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1; +} + +inline long long Key::getLongLong() const +{ + return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key; +} + +inline Key operator+(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key + key2.key ); +} + +inline Key operator-(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key - key2.key ); +} + +inline long operator&(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key & key2.key; +} + +inline Key operator/(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key / key2.key; +} + +#endif /* _COMMON_H */ diff --git a/common/config.h.in b/common/config.h.in new file mode 100644 index 0000000..0285bb0 --- /dev/null +++ b/common/config.h.in @@ -0,0 +1,33 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _CONFIG_H +#define _CONFIG_H + +/* Compilers. */ +#undef GDC +#undef GOBJC +#undef CXX +#undef CC +#undef JAVAC +#undef TXL + +#endif /* _CONFIG_H */ diff --git a/common/pcheck.h b/common/pcheck.h new file mode 100644 index 0000000..0b836a5 --- /dev/null +++ b/common/pcheck.h @@ -0,0 +1,49 @@ +/* + * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PCHECK_H +#define _PCHECK_H + +class ParamCheck +{ +public: + ParamCheck(char *paramSpec, int argc, char **argv); + + bool check(); + + char *parameterArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + char **argv; + +}; + +#endif /* _PCHECK_H */ diff --git a/configure b/configure new file mode 100755 index 0000000..130108c --- /dev/null +++ b/configure @@ -0,0 +1,3991 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.59. +# +# Copyright (C) 2003 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_config_libobj_dir=. +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +ac_unique_file="ragel/main.cpp" +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS BUILD_PARSERS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CXX CXXFLAGS ac_ct_CXX SET_MAKE FLEX GPERF BISON GDC GOBJC JAVAC TXL LIBOBJS LTLIBOBJS' +ac_subst_files='' + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 + { (exit 1); exit 1; }; } +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CXX_set=${CXX+set} +ac_env_CXX_value=$CXX +ac_cv_env_CXX_set=${CXX+set} +ac_cv_env_CXX_value=$CXX +ac_env_CXXFLAGS_set=${CXXFLAGS+set} +ac_env_CXXFLAGS_value=$CXXFLAGS +ac_cv_env_CXXFLAGS_set=${CXXFLAGS+set} +ac_cv_env_CXXFLAGS_value=$CXXFLAGS + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a + nonstandard directory <lib dir> + CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have + headers in a nonstandard directory <include dir> + CXX C++ compiler command + CXXFLAGS C++ compiler flags + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd "$ac_popdir" + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF + +Copyright (C) 2003 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_sep= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------- ## +## Output files. ## +## ------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h | sort + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + + ac_config_headers="$ac_config_headers common/config.h" + + +BUILD_PARSERS=true + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5 + (eval $ac_compiler --version </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5 + (eval $ac_compiler -v </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5 + (eval $ac_compiler -V </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext + break;; + * ) + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdarg.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include <stdlib.h> +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat >>confdefs.h <<_ACEOF +#define CC $CC +_ACEOF + + +ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + echo "$as_me:$LINENO: result: $CXX" >&5 +echo "${ECHO_T}$CXX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5 +echo "${ECHO_T}$ac_ct_CXX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CXX" && break +done +test -n "$ac_ct_CXX" || ac_ct_CXX="g++" + + CXX=$ac_ct_CXX +fi + + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C++ compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5 + (eval $ac_compiler --version </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5 + (eval $ac_compiler -v </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5 + (eval $ac_compiler -V </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6 +if test "${ac_cv_cxx_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6 +GXX=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +CXXFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5 +echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cxx_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cxx_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cxx_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6 +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include <stdlib.h> +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat >>confdefs.h <<_ACEOF +#define CXX $CXX +_ACEOF + + +ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + +echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6 +set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y,:./+-,___p_,'` +if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.make <<\_ACEOF +all: + @echo 'ac_maketemp="$(MAKE)"' +_ACEOF +# GNU make sometimes prints "make[1]: Entering...", which would confuse us. +eval `${MAKE-make} -f conftest.make 2>/dev/null | grep temp=` +if test -n "$ac_maketemp"; then + eval ac_cv_prog_make_${ac_make}_set=yes +else + eval ac_cv_prog_make_${ac_make}_set=no +fi +rm -f conftest.make +fi +if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + SET_MAKE= +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + SET_MAKE="MAKE=${MAKE-make}" +fi + + +if test $BUILD_PARSERS = true; then + +# Extract the first word of "flex", so it can be a program name with args. +set dummy flex; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_FLEX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$FLEX"; then + ac_cv_prog_FLEX="$FLEX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_FLEX="flex" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +FLEX=$ac_cv_prog_FLEX +if test -n "$FLEX"; then + echo "$as_me:$LINENO: result: $FLEX" >&5 +echo "${ECHO_T}$FLEX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -z "$FLEX"; then + echo + echo "error: flex is required to compile ragel" + echo + exit 1 +fi + +# Extract the first word of "gperf", so it can be a program name with args. +set dummy gperf; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_GPERF+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$GPERF"; then + ac_cv_prog_GPERF="$GPERF" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_GPERF="gperf" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +GPERF=$ac_cv_prog_GPERF +if test -n "$GPERF"; then + echo "$as_me:$LINENO: result: $GPERF" >&5 +echo "${ECHO_T}$GPERF" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -z "$GPERF"; then + echo + echo "error: gperf is required to compile ragel" + echo + exit 1 +fi + +# Extract the first word of "bison", so it can be a program name with args. +set dummy bison; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_BISON+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$BISON"; then + ac_cv_prog_BISON="$BISON" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_BISON="bison" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +BISON=$ac_cv_prog_BISON +if test -n "$BISON"; then + echo "$as_me:$LINENO: result: $BISON" >&5 +echo "${ECHO_T}$BISON" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -z "$BISON"; then + echo + echo "error: bison is required to compile ragel" + echo + exit 1 +fi + +if "$BISON" --version | grep 'bison++'; then + echo + echo "error: sorry, ragel cannot be compiled with bison++" + echo + exit 1 +fi + +fi # BUILD_PARSERS + +# Extract the first word of "gdc", so it can be a program name with args. +set dummy gdc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_GDC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$GDC"; then + ac_cv_prog_GDC="$GDC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_GDC="gdc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +GDC=$ac_cv_prog_GDC +if test -n "$GDC"; then + echo "$as_me:$LINENO: result: $GDC" >&5 +echo "${ECHO_T}$GDC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -n "$GDC"; then + cat >>confdefs.h <<_ACEOF +#define GDC $GDC +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for the Objective-C compiler" >&5 +echo $ECHO_N "checking for the Objective-C compiler... $ECHO_C" >&6 +cat > conftest.m <<EOF +int main() { return 0; } +EOF +GOBJC="" +if gcc -x objective-c conftest.m -o conftest.bin 2>/dev/null; then + GOBJC="gcc -x objective-c" + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + cat >>confdefs.h <<_ACEOF +#define GOBJC $GOBJC +_ACEOF + +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + +# Extract the first word of "javac", so it can be a program name with args. +set dummy javac; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_JAVAC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$JAVAC"; then + ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_JAVAC="javac" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +JAVAC=$ac_cv_prog_JAVAC +if test -n "$JAVAC"; then + echo "$as_me:$LINENO: result: $JAVAC" >&5 +echo "${ECHO_T}$JAVAC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -n "$JAVAC"; then + cat >>confdefs.h <<_ACEOF +#define JAVAC $JAVAC +_ACEOF + +fi + +# Extract the first word of "txl", so it can be a program name with args. +set dummy txl; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_TXL+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$TXL"; then + ac_cv_prog_TXL="$TXL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_TXL="txl" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +TXL=$ac_cv_prog_TXL +if test -n "$TXL"; then + echo "$as_me:$LINENO: result: $TXL" >&5 +echo "${ECHO_T}$TXL" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +if test -n "$TXL"; then + cat >>confdefs.h <<_ACEOF +#define TXL $TXL +_ACEOF + +fi + + ac_config_files="$ac_config_files Makefile common/Makefile ragel/Makefile rlcodegen/Makefile doc/Makefile test/Makefile" +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to <bug-autoconf@gnu.org>." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF + + + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "common/Makefile" ) CONFIG_FILES="$CONFIG_FILES common/Makefile" ;; + "ragel/Makefile" ) CONFIG_FILES="$CONFIG_FILES ragel/Makefile" ;; + "rlcodegen/Makefile" ) CONFIG_FILES="$CONFIG_FILES rlcodegen/Makefile" ;; + "doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; + "test/Makefile" ) CONFIG_FILES="$CONFIG_FILES test/Makefile" ;; + "common/config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS common/config.h" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@BUILD_PARSERS@,$BUILD_PARSERS,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@CXX@,$CXX,;t t +s,@CXXFLAGS@,$CXXFLAGS,;t t +s,@ac_ct_CXX@,$ac_ct_CXX,;t t +s,@SET_MAKE@,$SET_MAKE,;t t +s,@FLEX@,$FLEX,;t t +s,@GPERF@,$GPERF,;t t +s,@BISON@,$BISON,;t t +s,@GDC@,$GDC,;t t +s,@GOBJC@,$GOBJC,;t t +s,@JAVAC@,$JAVAC,;t t +s,@TXL@,$TXL,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + # Do quote $f, to prevent DOS paths from being IFS'd. + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + +_ACEOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +# Using a here document instead of a string reduces the quoting nightmare. +# Putting comments in sed scripts is not portable. +# +# `end' is used to avoid that the second main sed command (meant for +# 0-ary CPP macros) applies to n-ary macro definitions. +# See the Autoconf documentation for `clear'. +cat >confdef2sed.sed <<\_ACEOF +s/[\\&,]/\\&/g +s,[\\$`],\\&,g +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp +t end +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp +: end +_ACEOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC-DEFINE to be honored. +uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines +sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f confdef2sed.sed + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >>conftest.undefs <<\_ACEOF +s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, +_ACEOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null +do + # Write a limited-size here document to $tmp/defines.sed. + echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS + # Speed up: don't consider the non `#define' lines. + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/defines.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines +echo ' fi # grep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $tmp/undefs.sed. + echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS + # Speed up: don't consider the non `#undef' + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/undefs.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs + +cat >>$CONFIG_STATUS <<\_ACEOF + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in + if test x"$ac_file" != x-; then + if diff $ac_file $tmp/config.h >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + rm -f $ac_file + mv $tmp/config.h $ac_file + fi + else + cat $tmp/config.h + rm -f $tmp/config.h + fi +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + + +echo "configuration of ragel complete" diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..a8e55e6 --- /dev/null +++ b/configure.in @@ -0,0 +1,118 @@ +dnl +dnl Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +dnl + +dnl This file is part of Ragel. +dnl +dnl Ragel is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl Ragel is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with Ragel; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +AC_INIT(ragel/main.cpp) +AC_CONFIG_HEADER(common/config.h) + +dnl Set true if build system should generate parsers from flex, bison, and +dnl gperf sources. Set false if generated files are included and not to be +dnl built (production). +AC_SUBST(BUILD_PARSERS,true) + +dnl Checks for programs. +AC_PROG_CC +AC_DEFINE_UNQUOTED(CC,$CC) + +dnl Checks for programs. +AC_PROG_CXX +AC_DEFINE_UNQUOTED(CXX,$CXX) + +dnl Set test on c++ compiler. +AC_LANG_CPLUSPLUS + +dnl Check for definition of MAKE. +AC_PROG_MAKE_SET + +if test $BUILD_PARSERS = true; then + +dnl Check for flex +AC_CHECK_PROG(FLEX, flex, flex) +if test -z "$FLEX"; then + echo + echo "error: flex is required to compile ragel" + echo + exit 1 +fi + +dnl Check for gperf +AC_CHECK_PROG(GPERF, gperf, gperf) +if test -z "$GPERF"; then + echo + echo "error: gperf is required to compile ragel" + echo + exit 1 +fi + +dnl Check for bison +AC_CHECK_PROG(BISON, bison, bison) +if test -z "$BISON"; then + echo + echo "error: bison is required to compile ragel" + echo + exit 1 +fi + +dnl Sorry, Ragel will not compile with bison++. +if "$BISON" --version | grep 'bison++'; then + echo + echo "error: sorry, ragel cannot be compiled with bison++" + echo + exit 1 +fi + +fi # BUILD_PARSERS + +dnl Check for the D compiler +AC_CHECK_PROG(GDC, gdc, gdc) +if test -n "$GDC"; then + AC_DEFINE_UNQUOTED(GDC,$GDC) +fi + +dnl Check for the Objective-C compiler +AC_MSG_CHECKING([for the Objective-C compiler]) +cat > conftest.m <<EOF +int main() { return 0; } +EOF +GOBJC="" +if gcc -x objective-c conftest.m -o conftest.bin 2>/dev/null; then + GOBJC="gcc -x objective-c" + AC_MSG_RESULT([yes]) + AC_DEFINE_UNQUOTED(GOBJC,$GOBJC) +else + AC_MSG_RESULT([no]) +fi +AC_SUBST(GOBJC) + +dnl Check for the Java compiler. +AC_CHECK_PROG(JAVAC, javac, javac) +if test -n "$JAVAC"; then + AC_DEFINE_UNQUOTED(JAVAC,$JAVAC) +fi + +dnl Check for TXL. +AC_CHECK_PROG(TXL, txl, txl) +if test -n "$TXL"; then + AC_DEFINE_UNQUOTED(TXL,$TXL) +fi + +dnl write output files +AC_OUTPUT(Makefile common/Makefile ragel/Makefile rlcodegen/Makefile doc/Makefile test/Makefile) + +echo "configuration of ragel complete" diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..f52e021 --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,73 @@ +# +# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INPUT = version.tex ragel-guide.tex + +# Pick up all the figures in the current dir. +FIGURES = $(wildcard *.fig) +PDFFIGS = $(FIGURES:%.fig=%.pdf) + +# Get the version info. +include ../version.mk + +# Install prefix. +PREFIX = @prefix@ + +# Rules. +all: ragel-guide.pdf ragel.1 rlcodegen.1 + +ragel-guide.pdf: $(PDFFIGS) $(INPUT) + +%.pdf: %.fig + fig2dev -L pdf $< $@ + +%.pdf: %.tex + pdflatex -interaction=nonstopmode $< >/dev/null + pdflatex -interaction=nonstopmode $< >/dev/null + pdflatex -interaction=nonstopmode $< >/dev/null + +version.tex: ../version.mk + echo '\def\version{$(VERSION)}' > version.tex + echo '\def\pubdate{$(PUBDATE)}' >> version.tex + +ragel.1: ragel.1.in ../version.mk + cat ragel.1.in | sed 's/@PUBDATE@/$(PUBDATE)/' \ + | sed 's/@VERSION@/$(VERSION)/' > ragel.1 + +rlcodegen.1: rlcodegen.1.in ../version.mk + cat rlcodegen.1.in | sed 's/@PUBDATE@/$(PUBDATE)/' \ + | sed 's/@VERSION@/$(VERSION)/' > rlcodegen.1 + +clean: + rm -f ragel.1 rlcodegen.1 \ + *.bak *.aux *.dvi *.log *.toc *.pdf + +distclean: clean + rm -f Makefile + +install: all + install -d $(PREFIX)/man/man1 + install -m 644 ragel.1 $(PREFIX)/man/man1/ragel.1 + install -m 644 rlcodegen.1 $(PREFIX)/man/man1/rlcodegen.1 + install -d $(PREFIX)/share/doc/ragel + install -m 644 ragel-guide.pdf $(PREFIX)/share/doc/ragel/ragel-guide.pdf + gzip -c ../ChangeLog > ChangeLog.gz + install -m 644 ChangeLog.gz $(PREFIX)/share/doc/ragel/ChangeLog.gz + rm ChangeLog.gz diff --git a/doc/RELEASE_NOTES_V2 b/doc/RELEASE_NOTES_V2 new file mode 100644 index 0000000..1d03eda --- /dev/null +++ b/doc/RELEASE_NOTES_V2 @@ -0,0 +1,86 @@ + Porting Ragel Programs to Version 2 + =================================== + + +1. Move all ?, +, and * operators to the right hand side of the operand. + + float = *digit ?('.' +digit); + + float = digit* ('.' digit+)?; + +2. Change all assignments to main from a definition using the = operator to an +instantiation using the := operator. + + main = 'hello'; + + main := 'hello'; + +3. Remove $0 %! operations for clearing priorities. + +4. Anywhere implicit default priorities of zero are used to interact with +explicitly set non-zero transitions, set the priorities to zero explicitly. + + main := any* 'FIN' :1; + + main := ( any $0 )* 'FIN' :1; + +5. If priorities need to interact across different machines, use a common name. +Note that priority names default to the name of the machine they are assigned +to. + + wild = any*; + main := wild 'FIN' :1; + + wild = ( any $0 )*; + main := wild 'FIN' :wild,1; + +6. If using clear keyword or operators modified with ^, duplicate the operand +machines and rewrite them such that the cleared actions and suppressed out +transitions and out priorities are removed. + +7. Change func keyword to action. + +8. Escape any - symbols and initial ^ symbol in or literals ([] outside of +regular expressions). + + main := [^#$-+*]; + + main := [\^#$\-+*]; + +9. In C output, lowercase init, execute and finish routines and put an +underscore in between the fsm name and the function name. Also qualify +references to the fsm structure with the struct keyword. + + fsm f; + fsmInit( &f ); + fsmExecute( &f, buf, len ); + fsmFinish( &f ); + + struct fsm f; + fsm_init( &f ); + fsm_execute( &f, buf, len ); + fsm_finish( &f ); + +10. In C++ output, lowercase the init, execute and finish routines. Also make +sure that the init routine is explicitly called. + + fsm f; + f.Init(); + f.Execute( buf, len ); + f.Finish(); + + fsm f; + f.init(); + f.execute( buf, len ); + f.finish(); + +11. Remove calls to the accept routine, instead examine the return value of the +finish routine. If the machine does not accept then finish returns -1 or 0, if +the machine accepts then finish returns 1. + + f.finish(); + if ( f.accept() ) + cout << "ACCEPT" << endl; + + if ( f.finish() > 0 ) + cout << "ACCEPT" << endl; diff --git a/doc/RELEASE_NOTES_V3 b/doc/RELEASE_NOTES_V3 new file mode 100644 index 0000000..64dd2f1 --- /dev/null +++ b/doc/RELEASE_NOTES_V3 @@ -0,0 +1,8 @@ + Porting Ragel Version 2 Programs to Version 3 + ============================================= + +1. Replace all instances of *p in action code with the keyword fc. + +2. Replace all instances of : used to set actions or priorities with @. + +3. Wrap named priorities in parentheses so they are of the form @(name,1). diff --git a/doc/RELEASE_NOTES_V4 b/doc/RELEASE_NOTES_V4 new file mode 100644 index 0000000..a142f36 --- /dev/null +++ b/doc/RELEASE_NOTES_V4 @@ -0,0 +1,361 @@ + + RELEASE NOTES Ragel 4.X + + +To-State and From-State Action Embedding Operators Added (4.2) +============================================================== + +Added operators for embedding actions into all transitions into a state and all +transitions out of a state. These embeddings stay with the state, and are +irrespective of what the current transitions are and any future transitions +that may be added into or out of the state. + +In the following example act is executed on the transitions for 't' and 'y'. +Even though it is only embedded in the context of the first alternative. This +is because after matching 'hi ', the machine has not yet distinguished beween +the two threads. The machine is simultaneously in the state expecting 'there' +and the state expecting 'you'. + + action act {} + main := + 'hi ' %*act 'there' | + 'hi you'; + +The to-state action embedding operators embed into transitions that go into: +>~ the start state +$~ all states +%~ final states +<~ states that are not the start +@~ states that are not final +<@~ states that are not the start AND not final + +The from-state action embedding operators embed into transitions that leave: +>* the start state +$* all states +%* final states +<* states that are not the start +@* states that are not final +<@* states that are not the start AND not final + +Changed Operators for Embedding Context/Actions Into States (4.2) +================================================================= + +The operators used to embed context and actions into states have been modified. +The purpose of the modification is to make it easier to distribute actions to +take among the states in a chain of concatenations such that each state has +only a single action embedded. An example follows below. + +Now Gone: + +1. The use of >@ for selecting the states to modfiy (as in >@/ to embed eof + actions, etc) has been removed. This prefix meant start state OR not start AND + not final. + +2. The use of @% for selecting states to modify (as in @%/ to embed eof + actions, etc) has been removed. This prefix previously meant not start AND not + final OR final. + +Now Added: + +1. The prefix < which means not start. +2. The prefix @ which means not final. +3. The prefix <@ which means not start & not final" + +The new matrix of operators used to embed into states is: + +>: $: %: <: @: <@: - context +>~ $~ %~ <~ @~ <@~ - to state action +>* $* %* <* @* <@* - from state action +>/ $/ %/ </ @/ <@/ - eof action +>! $! %! <! @! <@! - error action +>^ $^ %^ <^ @^ <@^ - local error action + +| | | | | | +| | | | | *- not start & not final +| | | | | +| | | | *- not final +| | | | +| | | *- not start +| | | +| | *- final +| | +| *- all states +| +*- start state + +This example shows one way to use the new operators to cover all the states +with a single action. The embedding of eof2 covers all the states in m2. The +embeddings of eof1 and eof3 avoid the boundaries that m1 and m3 both share with +m2. + + action eof1 {} + action eof2 {} + action eof3 {} + m1 = 'm1'; + m2 = ' '+; + m3 = 'm3'; + + main := m1 @/eof1 . m2 $/eof2 . m3 </eof3; + +Verbose Action, Priority and Context Embedding Added (4.2) +========================================================== + +As an alternative to the symbol-based action, priority and context embedding +operators, a more verbose form of embedding has been added. The general form of +the verbose embedding is: + + machine <- location [modifier] embedding_type value + +For embeddings into transitions, the possible locations are: + enter -- entering transitions + all -- all transitions + finish -- transitions into a final state + leave -- pending transitions out of the final states + +For embeddings into states, the possible locations are: + start -- the start state + all -- all states + final -- final states + !start -- all states except the start + !final -- states that are not final + !start !final -- states that are not the start and not final + +The embedding types are: + exec -- an action into transitions + pri -- a priority into transitions + ctx -- a named context into a state + into -- an action into all transitions into a state + from -- an action into all transitions out of a state + err -- an error action into a state + lerr -- a local error action into a state + +The possible modfiers: + on name -- specify a name for priority and local error embedding + +Character-Level Negation '^' Added (4.1) +======================================== + +A character-level negation operator ^ was added. This operator has the same +precedence level as !. It is used to match single characters that are not +matched by the machine it operates on. The expression ^m is equivalent to +(any-(m)). This machine makes sense only when applied to machines that match +single characters. Since subtraction is essentially a set difference, any +strings matched by m that are not of length 1 will be ignored by the +subtraction and have no effect. + +Discontinued Plus Sign To Specifify Positive Literal Numbers (4.1) +================================================================== + +The use of + to specify a literal number as positive has been removed. This +notation is redundant because all literals are positive by default. It was +unlikely to be used but was provided for consistency. This notation caused an +ambiguity with the '+' repetition operator. Due to this ambibuity, and the fact +that it is unlikely to be used and is completely unnecessary when it is, it has +been removed. This simplifies the design. It elimnates possible confusion and +removes the need to explain why the ambiguity exists and how it is resolved. + +As a consequence of the removal, any expression (m +1) or (m+1) will now be +parsed as (m+ . 1) rather then (m . +1). This is because previously the scanner +handled positive literals and therefore they got precedence over the repetition +operator. + +Precedence of Subtraction Operator vs Negative Literals Changed (4.1) +===================================================================== + +Previously, the scanner located negative numbers and therefore gave a higher +priority to the use of - to specify a negative literal number. This has +changed, precedence is now given to the subtraction operator. + +This change is for two reasons: A) The subtraction operator is far more common +than negative literal numbers. I have quite often been fooled by writing +(any-0) and having it parsed as ( any . -0 ) rather than ( any - 0 ) as I +wanted. B) In the definition of concatentation I want to maintain that +concatenation is used only when there are no other binary operators separating +two machines. In the case of (any-0) there is an operator separating the +machines and parsing this as the concatenation of (any . -0) violates this +rule. + +Duplicate Actions are Removed From Action Lists (4.1) +===================================================== + +With previous versions of Ragel, effort was often expended towards ensuring +identical machines were not uniononed together, causing duplicate actions to +appear in the same action list (transition or eof perhaps). Often this required +factoring out a machine or specializing a machine's purpose. For example, +consider the following machine: + + word = [a-z]+ >s $a %l; + main := + ( word ' ' word ) | + ( word '\t' word ); + +This machine needed to be rewritten as the following to avoid duplicate +actions. This is essentially a refactoring of the machine. + + main := word ( ' ' | '\t' ) word; + +An alternative was to specialize the machines: + + word1 = [a-z]+ >s $a %l; + word2 = [a-z]+; + main := + ( word1 ' ' word1 ) | + ( word2 '\t' word1 ); + +Since duplicating an action on a transition is never (in my experience) desired +and must be manually avoided, sometimes to the point of obscuring the machine +specification, it is now done automatically by Ragel. This change should have +no effect on existing code that is properly written and will allow the +programmer more freedom when writing new code. + +New Frontend (4.0) +================== + +The syntax for embedding Ragel statements into the host language has changed. +The primary motivation is a better interaction with Objective-C. Under the +previous scheme Ragel generated the opening and closing of the structure and +the interface. The user could inject user defined declarations into the struct +using the struct {}; statement, however there was no way to inject interface +declarations. Under this scheme it was also awkward to give the machine a base +class. Rather then add another statement similar to struct for including +declarations in the interface we take the reverse approach, the user now writes +the struct and interface and Ragel statements are injected as needed. + +Machine specifications now begin with %% and are followed with an optional name +and either a single ragel statement or a sequence of statements enclosed in {}. +If a machine specification does not have a name then Ragel tries to find a name +for it by first checking if the specification is inside a struct or class or +interface. If it is not then it uses the name of the previous machine +specification. If still no name is found then an error is raised. + +Since the user now specifies the fsm struct directly and since the current +state and stack variables are now of type integer in all code styles, it is +more appropriate for the user to manage the declarations of these variables. +Ragel no longer generates the current state and the stack data variables. This +also gives the user more freedom in deciding how the stack is to be allocated, +and also permits it to be grown as necessary, rather than allowing only a fixed +stack size. + +FSM specifications now persist in memory, so the second time a specification of +any particular name is seen the statements will be added to the previous +specification. Due to this it is no longer necessary to give the element or +alphabet type in the header portion and in the code portion. In addition there +is now an include statement that allows the inclusion of the header portion of +a machine it it resides in a different file, as well as allowing the inclusion +of a machine spec of a different name from the any file at all. + +Ragel is still able to generate the machine's function declarations. This may +not be required for C code, however this will be necessary for C++ and +Objective-C code. This is now accomplished with the interface statement. + +Ragel now has different criteria for deciding what to generate. If the spec +contains the interface statement then the machine's interface is generated. If +the spec contains the definition of a main machine, then the code is generated. +It is now possible to put common machine definitions into a separate library +file and to include them in other machine specifications. + +To port Ragel 3.x programs to 4.x, the FSM's structure must be explicitly coded +in the host language and it must include the declaration of current state. This +should be called 'curs' and be of type int. If the machine uses the fcall +and fret directives, the structure must also include the stack variables. The +stack should be named 'stack' and be of type int*. The stack top should be +named 'top' and be of type int. + +In Objective-C, the both the interface and implementation directives must also +be explicitly coded by the user. Examples can be found in the section "New +Interface Examples". + +Action and Priority Embedding Operators (4.0) +============================================= + +In the interest of simplifying the language, operators now embed strictly +either on characters or on EOF, but never both. Operators should be doing one +well-defined thing, rather than have multiple effects. This also enables the +detection of FSM commands that do not make sense in EOF actions. + +This change is summarized by: + -'%' operator embeds only into leaving characters. + -All global and local error operators only embed on error character + transitions, their action will not be triggerend on EOF in non-final states. + -Addition of EOF action embedding operators for all classes of states to make + up for functionality removed from other operators. These are >/ $/ @/ %/. + -Start transition operator '>' does not imply leaving transtions when start + state is final. + +This change results in a simpler and more direct relationship between the +operators and the physical state machine entities they operate on. It removes +the special cases within the operators that require you to stop and think as +you program in Ragel. + +Previously, the pending out transition operator % simultaneously served two +purposes. First, to embed actions to that are to get transfered to transitions +made going out of the machine. These transitions are created by the +concatentation and kleene star operators. Second, to specify actions that get +executed on EOF should the final state in the machine to which the operator is +applied remain final. + +To convert Ragel 3.x programs: Any place where there is an embedding of an +action into pending out transitions using the % operator and the final states +remain final in the end result machine, add an embedding of the same action +using the EOF operator %/action. + +Also note that when generating dot file output of a specific component of a +machine that has leaving transitions embedded in the final states, these +transitions will no longer show up since leaving transtion operator no longer +causes actions to be moved into the the EOF event when the state they are +embeeded into becomes a final state of the final machine. + +Const Element Type (4.0) +======================== + +If the element type has not been defined, the previous behaviour was to default +to the alphabet type. The element type however is usually not specified as +const and in most cases the data pointer in the machine's execute function +should be a const pointer. Therefore ragel now makes the element type default +to a constant version of the alphabet type. This can always be changed by using +the element statment. For example 'element char;' will result in a non-const +data pointer. + +New Interface Examples (4.0) +============================ + +---------- C ---------- + +struct fsm +{ + int curs; +}; + +%% fsm +{ + main := 'hello world'; +} + +--------- C++ --------- + +struct fsm +{ + int curs; + %% interface; +}; + +%% main := 'hello world'; + +----- Objective-C ----- + +@interface Clang : Object +{ +@public + int curs; +}; + +%% interface; + +@end + +@implementation Clang + +%% main := 'hello world'; + +@end + diff --git a/doc/RELEASE_NOTES_V5 b/doc/RELEASE_NOTES_V5 new file mode 100644 index 0000000..15147d8 --- /dev/null +++ b/doc/RELEASE_NOTES_V5 @@ -0,0 +1,112 @@ + + RELEASE NOTES Ragel 5.X + +This file describes the changes in Ragel version 5.X that are not backwards +compatible. For a list of all the changes see the ChangeLog file. + + +Interface to Host Programming Language +====================================== + +In version 5.0 there is a new interface to the host programming language. +There are two major changes: the way Ragel specifications are embedded in the +host program text, and the way that the host program interfaces with the +generated code. + +Multiline Ragel specifications begin with '%%{' and end with '}%%'. Single line +specifications start with '%%' and end at the first newline. Machine names are +given with the machine statement at the very beginning of a machine spec. This +change was made in order to make the task of separating Ragel code from the +host code as straightforward as possible. This will ease the addition of more +supported host languages. + +Ragel no longer parses structure and class names in order to infer machine +names. Parsing structures and clases requires knowledge of the host language +hardcoded into Ragel. Since Ragel is moving towards language independence, this +feature has been removed. + +If a machine spec does not have a name then the previous spec name is used. If +there is no previous specification then this is an error. + +The second major frontend change in 5.0 is doing away with the init(), +execute() and finish() routines. Instead of generating these functions Ragel +now only generates their contents. This scheme is more flexible, allowing the +user to use a single function to drive the machine or separate out the +different tasks if desired. It also frees the user from having to build the +machine around a structure or a class. + +An example machine is: + +-------------------------- + +%%{ + machine fsm; + main := 'hello world'; +}%% + +%% write data; + +int parse( char *p ) +{ + int cs; + char *pe = p + strlen(p); + %%{ + write init; + write exec; + }%% + return cs; +}; + +-------------------------- + +The generated code expects certain variables to be available. In some cases +only if the corresponding features are used. + + el* p: A pointer to the data to parse. + el* pe: A pointer to one past the last item. + int cs: The current state. + el* tokstart: The beginning of current match of longest match machines. + el* tokend: The end of the current match. + int act: The longest match pattern that has been matched. + int stack[n]: The stack for machine call statements + int top: The top of the stack for machine call statements + +It is possible to specify to Ragel how the generated code should access all the +variables except p and pe by using the access statement. + + access some_pointer->; + access variable_name_prefix; + +The writing statments are: + + write data; + write init; + write exec; + write eof; + +There are some options available: + + write data noerror nofinal noprefix; + write exec noend + + noerror: Do not write the id of the error state. + nofinal: Do not write the id of the first_final state. + noprefix: Do not prefix the variable with the name of the machine + noend: Do not test if the current character has reached pe. This is + useful if one wishes to break out of the machine using fbreak + when hitting some marker, such as the null character. + +The fexec Action Statement Changed +================================== + +The fexec action statement has been changed to take only the new position to +move to. This statement is more useful for moving backwards and reparsing input +than for specifying a whole new buffer entirely and has been shifted to this +new use. Also, using only a single argument simplifies the parsing of Ragel +input files and will ease the addition of other host languages. + +Context Embedding Has Been Dropped +================================== + +The context embedding operators were not carried over from version 4.X. Though +interesting, they have not found any real practical use. diff --git a/doc/bmconcat.fig b/doc/bmconcat.fig new file mode 100644 index 0000000..a47f13b --- /dev/null +++ b/doc/bmconcat.fig @@ -0,0 +1,40 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1440 450 135 135 1440 450 1575 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2115 450 135 135 2115 450 2250 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2790 450 135 135 2790 450 2925 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3465 450 135 135 3465 450 3600 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4140 450 135 135 4140 450 4275 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4140 450 90 90 4140 450 4230 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 900 450 1305 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1575 450 1980 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2250 450 2655 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2925 450 3330 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 3600 450 4005 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 75 1035 405 h\001 +4 0 0 50 0 0 10 0.0000 4 75 60 1710 405 e\001 +4 0 0 50 0 0 10 0.0000 4 105 60 2385 405 l\001 +4 0 0 50 0 0 10 0.0000 4 105 60 3060 405 l\001 +4 0 0 50 0 0 10 0.0000 4 75 75 3735 405 o\001 diff --git a/doc/bmnull.fig b/doc/bmnull.fig new file mode 100644 index 0000000..1b85885 --- /dev/null +++ b/doc/bmnull.fig @@ -0,0 +1,15 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 90 90 765 450 855 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 diff --git a/doc/bmnum.fig b/doc/bmnum.fig new file mode 100644 index 0000000..5160114 --- /dev/null +++ b/doc/bmnum.fig @@ -0,0 +1,20 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 450 135 135 1665 450 1800 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 450 90 90 1665 450 1755 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 900 450 1530 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 75 270 1035 405 num\001 diff --git a/doc/bmor.fig b/doc/bmor.fig new file mode 100644 index 0000000..69c6da0 --- /dev/null +++ b/doc/bmor.fig @@ -0,0 +1,28 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1327.500 103.500 810 585 1305 810 1845 585 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1327.500 -472.500 900 495 1305 585 1755 495 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1327.500 796.500 810 315 1305 90 1845 315 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1327.500 1372.500 900 405 1305 315 1755 405 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1890 450 90 90 1890 450 1980 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1890 450 135 135 1890 450 2025 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 75 1305 45 h\001 +4 0 0 50 0 0 10 0.0000 4 75 60 1305 270 e\001 +4 0 0 50 0 0 10 0.0000 4 105 60 1305 540 l\001 +4 0 0 50 0 0 10 0.0000 4 75 75 1305 765 o\001 diff --git a/doc/bmrange.fig b/doc/bmrange.fig new file mode 100644 index 0000000..7ad3693 --- /dev/null +++ b/doc/bmrange.fig @@ -0,0 +1,20 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1710 450 135 135 1710 450 1845 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1710 450 90 90 1710 450 1800 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 900 450 1575 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 285 1080 405 l .. u\001 diff --git a/doc/bmregex.fig b/doc/bmregex.fig new file mode 100644 index 0000000..5823524 --- /dev/null +++ b/doc/bmregex.fig @@ -0,0 +1,42 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3420.000 240.000 3330 360 3420 90 3510 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1440.000 240.000 1350 360 1440 90 1530 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2340.000 240.000 2250 360 2340 90 2430 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2880.000 266.250 3375 585 2880 855 2385 585 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1440 450 135 135 1440 450 1575 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2340 450 135 135 2340 450 2475 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3420 450 135 135 3420 450 3555 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3420 450 90 90 3420 450 3510 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 900 450 1305 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1575 450 2205 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2475 450 3285 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 495 315 360 405 630 495 450 540 450 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 75 60 1035 405 a\001 +4 0 0 50 0 0 10 0.0000 4 105 75 1395 45 b\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2250 45 df\001 +4 0 0 50 0 0 10 0.0000 4 135 315 2700 405 1,2,3\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1800 405 c-z\001 +4 0 0 50 0 0 10 0.0000 4 135 315 3285 45 1,2,3\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2790 810 df\001 diff --git a/doc/docbook.dsl b/doc/docbook.dsl new file mode 100644 index 0000000..e8fabe0 --- /dev/null +++ b/doc/docbook.dsl @@ -0,0 +1,49 @@ +<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [ +<!ENTITY docbook.dsl PUBLIC + "-//Norman Walsh//DOCUMENT DocBook Print Stylesheet//EN" CDATA dsssl> +]> + +<style-sheet> +<style-specification use="docbook"> +<style-specification-body> + +;; your stuff goes here... + +(define %generate-article-titlepage% #t) +(define %generate-article-toc% #t) +(define %generate-article-titlepage-on-separate-page% #t) +(define %generate-article-toc-on-titlepage% #f) +(define %article-page-number-restart% #t) + +(define %chapter-autolabel% #t) +(define %section-autolabel% #t) +(define (toc-depth nd) 3) + +; === Media objects === +(define preferred-mediaobject-extensions ;; this magic allows to use different graphical + (list "eps")) ;; formats for printing and putting online +(define acceptable-mediaobject-extensions + '()) +(define preferred-mediaobject-notations + (list "EPS")) +(define acceptable-mediaobject-notations + (list "linespecific")) + +; === Rendering === +(define %head-after-factor% 0.2) ;; not much whitespace after orderedlist head +(define ($paragraph$) ;; more whitespace after paragraph than before + (make paragraph + first-line-start-indent: (if (is-first-para) + %para-indent-firstpara% + %para-indent%) + space-before: (* %para-sep% 4) + space-after: (/ %para-sep% 4) + quadding: %default-quadding% + hyphenate?: %hyphenation% + language: (dsssl-language-code) + (process-children))) + +</style-specification-body> +</style-specification> +<external-specification id="docbook" document="docbook.dsl"> +</style-sheet> diff --git a/doc/exaction.fig b/doc/exaction.fig new file mode 100644 index 0000000..e41ef2e --- /dev/null +++ b/doc/exaction.fig @@ -0,0 +1,37 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1620.000 400.500 1530 495 1620 270 1710 495 + 1 1 2.00 60.00 60.00 +6 1377 810 1872 990 +4 0 0 50 0 0 10 0.0000 4 120 315 1557 945 /C,N\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1377 945 nl\001 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1620 585 135 135 1620 585 1755 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 585 135 135 2655 585 2790 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 585 90 90 2655 585 2745 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 585 1485 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1755 585 2520 585 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 630 135 495 225 765 315 585 360 585 450 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 630 720 765 900 1305 1035 1935 1035 2475 900 2610 720 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 120 495 855 540 a-z/A,B\001 +4 0 0 50 0 0 10 0.0000 4 105 330 1485 225 a-z/B\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1935 540 nl\001 +4 0 0 50 0 0 10 0.0000 4 120 315 2115 540 /C,N\001 diff --git a/doc/exallact.fig b/doc/exallact.fig new file mode 100644 index 0000000..40f4fcb --- /dev/null +++ b/doc/exallact.fig @@ -0,0 +1,25 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 135 135 2430 495 2565 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 90 90 2430 495 2520 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 495 1395 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1665 495 2295 495 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 540 180 405 270 675 360 495 405 495 495 495 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 285 945 450 m/A\001 +4 0 0 50 0 0 10 0.0000 4 135 360 1800 450 1,2/A\001 diff --git a/doc/exallpri.fig b/doc/exallpri.fig new file mode 100644 index 0000000..1b3a7ad --- /dev/null +++ b/doc/exallpri.fig @@ -0,0 +1,33 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 825.000 540 945 630 675 720 945 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 1035 1170 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1440 1035 1845 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2115 1035 2520 1035 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 1080 180 945 270 1215 360 1035 405 1035 495 1035 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 540 630 df\001 +4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001 +4 0 0 50 0 0 10 0.0000 4 105 60 1575 990 I\001 +4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001 diff --git a/doc/exconcat.fig b/doc/exconcat.fig new file mode 100644 index 0000000..21bf76f --- /dev/null +++ b/doc/exconcat.fig @@ -0,0 +1,93 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1080 135 135 1845 1080 1980 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3105 1080 135 135 3105 1080 3240 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3105 1080 90 90 3105 1080 3195 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1845 135 135 1845 1845 1980 1845 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1125 1575 135 135 1125 1575 1260 1575 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 1080 135 135 585 1080 720 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 1575 135 135 2565 1575 2700 1575 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 1080 1710 1080 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1215 1485 1755 1170 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 675 1170 1035 1485 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1260 1620 1710 1800 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1845 1710 1845 1215 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2655 1485 3015 1170 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2970 1080 1980 1080 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1980 1800 2430 1620 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2475 1485 1935 1170 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5 + 1 1 2.00 60.00 60.00 + 1755 1935 1485 2115 900 2070 405 1530 495 1170 + 0.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5 + 1 1 2.00 60.00 60.00 + 1035 1665 945 1755 765 1755 540 1530 585 1215 + 0.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 3 + 1 1 2.00 60.00 60.00 + 1755 990 1215 675 675 990 + 0.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 450 1035 225 810 180 675 225 630 315 675 540 945 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1800 945 1800 765 1800 675 1890 675 1890 810 1890 945 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5 + 1 1 2.00 60.00 60.00 + 3105 945 3105 405 900 405 675 765 630 945 + 0.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 1125 135 990 225 1260 315 1080 360 1080 450 1080 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 9 + 1 1 2.00 60.00 60.00 + 3105 1215 3105 1350 3060 1620 2880 1845 2565 2070 2115 2160 + 1710 2115 1350 1980 1170 1710 + 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 + 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 675 1575 nl\001 +4 0 0 50 0 0 10 0.0000 4 105 90 855 1260 E\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1125 720 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1125 1035 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 180 585 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1755 630 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2475 1035 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 990 1980 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1755 360 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2205 1305 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2655 1305 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1305 1305 df\001 +4 0 0 50 0 0 10 0.0000 4 105 105 1485 1665 O\001 +4 0 0 50 0 0 10 0.0000 4 105 90 2115 1665 F\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1620 1485 df\001 +4 0 0 50 0 0 10 0.0000 4 105 90 2295 2025 E\001 diff --git a/doc/exdoneact.fig b/doc/exdoneact.fig new file mode 100644 index 0000000..a9904af --- /dev/null +++ b/doc/exdoneact.fig @@ -0,0 +1,24 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 310.500 540 405 630 180 720 405 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 90 90 1530 495 1620 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 495 1395 495 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 540 180 405 270 675 360 495 405 495 495 495 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 75 195 540 135 a-z\001 +4 0 0 50 0 12 10 0.0000 4 105 180 900 450 sp\001 +4 0 0 50 0 0 10 0.0000 4 105 165 1080 450 /A\001 diff --git a/doc/exdonepri.fig b/doc/exdonepri.fig new file mode 100644 index 0000000..a76a485 --- /dev/null +++ b/doc/exdonepri.fig @@ -0,0 +1,55 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 1035 1170 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1440 1035 1845 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2115 1035 2520 1035 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 1080 180 945 270 1215 360 1035 405 1035 495 1035 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 4 + 1 1 2.00 60.00 60.00 + 1215 1125 1080 1305 855 1305 720 1125 + 0.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1890 1125 1755 1350 1305 1485 810 1485 675 1350 630 1170 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 585 900 585 765 585 630 675 630 675 765 675 900 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1260 900 1260 765 1260 630 1350 630 1350 765 1350 900 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 4 + 1 1 2.00 60.00 60.00 + 1890 945 1755 765 1530 765 1395 945 + 0.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001 +4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001 +4 0 0 50 0 12 10 0.0000 4 105 180 855 1215 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1215 1395 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 540 585 df\001 +4 0 0 50 0 0 10 0.0000 4 105 90 1260 585 F\001 +4 0 0 50 0 0 10 0.0000 4 105 90 1620 720 F\001 +4 0 0 50 0 0 10 0.0000 4 105 60 1620 990 I\001 diff --git a/doc/exfinact.fig b/doc/exfinact.fig new file mode 100644 index 0000000..3cb98c9 --- /dev/null +++ b/doc/exfinact.fig @@ -0,0 +1,29 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 400.500 1575 495 1665 270 1755 495 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 585 90 90 2565 585 2655 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 585 135 135 2565 585 2700 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 585 135 135 765 585 900 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 135 135 1665 585 1800 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 900 585 1530 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1800 585 2430 585 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 225 630 315 495 405 765 495 585 540 585 630 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 1980 540 nl\001 +4 0 0 50 0 0 10 0.0000 4 105 165 2160 540 /A\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1080 540 a-z\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1575 225 a-z\001 diff --git a/doc/exfinpri.fig b/doc/exfinpri.fig new file mode 100644 index 0000000..947b29c --- /dev/null +++ b/doc/exfinpri.fig @@ -0,0 +1,55 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 378.000 1530 450 1665 225 1800 450 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1174.891 998.804 1485 540 945 495 630 900 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1237.500 992.500 1485 1575 990 1575 630 1170 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 1323.000 1530 1395 1665 1170 1800 1395 + 1 1 2.00 60.00 60.00 +6 720 225 1125 540 +4 0 0 50 0 0 10 0.3840 4 105 165 931 418 /A\001 +4 0 0 50 0 12 10 0.3840 4 105 180 763 485 sp\001 +-6 +6 855 1350 1215 1575 +4 0 0 50 0 12 10 5.8294 4 105 180 871 1429 sp\001 +4 0 0 50 0 0 10 5.8294 4 105 135 1033 1508 /B\001 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 135 135 1665 585 1800 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 180 180 1665 585 1845 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 1530 180 180 1665 1530 1845 1530 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 1530 135 135 1665 1530 1800 1530 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 90 90 630 1035 720 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 945 1485 630 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 717 1118 1485 1485 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 1080 180 945 270 1215 360 1035 405 1035 495 1035 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 495 990 360 855 270 765 360 675 450 765 585 900 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 2 0 2 0 7 44 0 -1 0.000 0 1 0 4 + 1 1 2.00 60.00 60.00 + 1845 1530 2160 1305 2160 810 1845 585 + 0.000 -1.000 -1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 270 630 sp\001 +4 0 0 50 0 0 10 5.8818 4 105 210 1035 1215 0-9\001 +4 0 0 50 0 0 10 0.3840 4 75 195 945 810 a-z\001 +4 0 0 50 0 0 10 0.0000 4 120 450 1440 180 a-z,0-9\001 +4 0 0 50 0 0 10 0.0000 4 105 210 1530 1125 0-9\001 +4 0 0 50 0 0 10 0.0000 4 105 330 2295 1035 a-z/B\001 diff --git a/doc/exinter.fig b/doc/exinter.fig new file mode 100644 index 0000000..51bc5df --- /dev/null +++ b/doc/exinter.fig @@ -0,0 +1,48 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1125.000 1777.500 765 360 1125 315 1485 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1125.000 -877.500 765 540 1125 585 1485 540 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 2025.000 -877.500 1665 540 2025 585 2385 540 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 2925.000 -877.500 2565 540 2925 585 3285 540 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 3825.000 -877.500 3465 540 3825 585 4185 540 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2025.000 1777.500 1665 360 2025 315 2385 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2925.000 1777.500 2565 360 2925 315 3285 360 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3825.000 1777.500 3465 360 3825 315 4185 360 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 675 450 135 135 675 450 810 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 135 135 1575 450 1710 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 135 135 2475 450 2610 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 450 135 135 3375 450 3510 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4275 450 135 135 4275 450 4410 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4275 450 90 90 4275 450 4365 450 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 135 495 225 360 315 630 405 450 450 450 540 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5 + 1 1 2.00 60.00 60.00 + 4275 585 4320 990 2475 1215 630 990 675 585 + 0.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 2385 1080 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1035 540 sp\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1935 540 sp\001 +4 0 0 50 0 12 10 0.0000 4 105 180 2835 540 sp\001 +4 0 0 50 0 12 10 0.0000 4 105 180 3735 540 sp\001 +4 0 0 50 0 0 10 0.0000 4 75 195 3735 270 a-z\001 +4 0 0 50 0 0 10 0.0000 4 75 195 2835 270 a-z\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1935 270 a-z\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1035 270 a-z\001 diff --git a/doc/exnegate.fig b/doc/exnegate.fig new file mode 100644 index 0000000..ceb4a90 --- /dev/null +++ b/doc/exnegate.fig @@ -0,0 +1,31 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 1350 180 1710 765 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1530.000 375.000 1440 495 1530 225 1620 495 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 585 135 135 1530 585 1665 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 585 90 90 1530 585 1620 585 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 90 90 585 585 675 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 585 1395 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 585 450 900 135 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 630 135 495 225 765 315 585 360 585 450 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 1440 180 df\001 +4 0 0 50 0 12 10 0.0000 4 105 180 900 540 df\001 +4 0 0 50 0 0 10 0.7854 4 105 210 585 360 0-9\001 +4 0 0 50 0 22 10 0.0000 4 105 165 945 135 Err\001 diff --git a/doc/exoption.fig b/doc/exoption.fig new file mode 100644 index 0000000..b59f46e --- /dev/null +++ b/doc/exoption.fig @@ -0,0 +1,37 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1395.000 330.000 1305 450 1395 180 1485 450 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3015.000 330.000 2925 450 3015 180 3105 450 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 540 135 135 585 540 720 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1395 540 90 90 1395 540 1485 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1395 540 135 135 1395 540 1530 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2205 540 135 135 2205 540 2340 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3015 540 135 135 3015 540 3150 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3015 540 90 90 3015 540 3105 540 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 540 1260 540 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1530 540 2070 540 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2340 540 2880 540 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 585 135 450 225 720 315 540 360 540 450 540 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 210 900 495 0-9\001 +4 0 0 50 0 0 10 0.0000 4 105 210 1305 135 0-9\001 +4 0 0 50 0 0 10 0.0000 4 15 45 1755 495 .\001 +4 0 0 50 0 0 10 0.0000 4 105 210 2520 495 0-9\001 +4 0 0 50 0 0 10 0.0000 4 105 210 2925 135 0-9\001 diff --git a/doc/exor.fig b/doc/exor.fig new file mode 100644 index 0000000..5d30b16 --- /dev/null +++ b/doc/exor.fig @@ -0,0 +1,65 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 720 990 135 135 720 990 855 990 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 990 135 135 1800 990 1935 990 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 990 90 90 1800 990 1890 990 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 360 90 90 1800 360 1890 360 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 1620 90 90 1800 1620 1890 1620 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 360 135 135 1800 360 1935 360 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 1620 135 135 1800 1620 1935 1620 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2700 540 135 135 2700 540 2835 540 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3825 900 135 135 3825 900 3960 900 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3825 900 90 90 3825 900 3915 900 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 855 1665 360 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 855 990 1665 990 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 1125 1665 1620 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1935 360 2565 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2835 585 3690 855 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1800 495 1800 855 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 180 1035 270 900 360 1170 450 990 495 990 585 990 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1935 1665 2745 1665 2880 1665 2880 1575 2745 1575 1935 1575 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1935 1035 2250 1035 2385 1035 2385 945 2250 945 1935 945 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 2 0 1 7 7 50 -1 -1 0.000 0 0 0 2 + 4455 540 4455 1035 + 0.000 0.000 +3 2 0 2 0 7 50 -1 -1 0.000 0 1 0 4 + 1 1 2.00 60.00 60.00 + 3690 945 3555 1305 4095 1305 3960 945 + 0.000 -1.000 -1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 195 1530 675 0-9\001 +4 0 0 50 0 0 10 0.0000 4 105 195 1125 945 1-9\001 +4 0 0 50 0 0 10 5.7770 4 120 435 1035 1215 a-z,A-Z\001 +4 0 0 50 0 0 10 0.5061 4 105 75 1080 630 0\001 +4 0 0 50 0 0 10 0.0000 4 105 195 2070 900 0-9\001 +4 0 0 50 0 0 10 0.0000 4 120 660 2070 1530 0-9,a-z,A-Z\001 +4 0 0 50 0 0 12 6.0214 4 75 90 2160 360 x\001 +4 0 0 50 0 0 10 5.9865 4 120 645 2925 540 0-9,a-f,A-F\001 +4 0 0 50 0 0 10 0.0000 4 120 645 3510 1575 0-9,a-f,A-F\001 diff --git a/doc/explus.fig b/doc/explus.fig new file mode 100644 index 0000000..cb42300 --- /dev/null +++ b/doc/explus.fig @@ -0,0 +1,23 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1845.000 375.000 1755 495 1845 225 1935 495 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 585 135 135 1845 585 1980 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 585 90 90 1845 585 1935 585 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 585 1710 585 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 630 135 495 225 765 315 585 360 585 450 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 135 765 810 540 0-9,a-z,A-Z\001 +4 0 0 50 0 0 10 0.0000 4 135 765 1485 180 0-9,a-z,A-Z\001 diff --git a/doc/exstact.fig b/doc/exstact.fig new file mode 100644 index 0000000..699324e --- /dev/null +++ b/doc/exstact.fig @@ -0,0 +1,33 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1530.000 310.500 1440 405 1530 180 1620 405 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 90 90 2430 495 2520 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 135 135 2430 495 2565 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 495 1395 495 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1665 495 2295 495 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 540 180 405 270 675 360 495 405 495 495 495 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 720 585 855 765 1215 900 1845 900 2205 765 2340 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 360 900 450 a-z/A\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1890 450 sp\001 +4 0 0 50 0 12 10 0.0000 4 105 180 1440 810 sp\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1427 127 a-z\001 diff --git a/doc/exstar.fig b/doc/exstar.fig new file mode 100644 index 0000000..cca7963 --- /dev/null +++ b/doc/exstar.fig @@ -0,0 +1,32 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 1 0 1 1035.000 -742.500 675 675 1035 720 1395 675 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1035.000 1912.500 675 495 1035 450 1395 495 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 90 90 585 585 675 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1485 585 135 135 1485 585 1620 585 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 540 450 540 315 540 180 630 180 630 315 630 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 630 135 495 225 765 315 585 360 585 450 585 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 1440 450 1440 315 1440 180 1530 180 1530 315 1530 450 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 75 195 945 405 a-z\001 +4 0 0 50 0 12 10 0.0000 4 105 180 945 675 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 180 495 135 nl\001 +4 0 0 50 0 0 10 0.0000 4 75 195 1395 135 a-z\001 diff --git a/doc/exstpri.fig b/doc/exstpri.fig new file mode 100644 index 0000000..1b3a7ad --- /dev/null +++ b/doc/exstpri.fig @@ -0,0 +1,33 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 825.000 540 945 630 675 720 945 + 1 1 2.00 60.00 60.00 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 765 1035 1170 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1440 1035 1845 1035 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2115 1035 2520 1035 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 90 1080 180 945 270 1215 360 1035 405 1035 495 1035 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 12 10 0.0000 4 105 180 540 630 df\001 +4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001 +4 0 0 50 0 0 10 0.0000 4 105 60 1575 990 I\001 +4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001 diff --git a/doc/exstrongsubtr.fig b/doc/exstrongsubtr.fig new file mode 100644 index 0000000..1aca526 --- /dev/null +++ b/doc/exstrongsubtr.fig @@ -0,0 +1,65 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +# Generated by dot version 2.2.1 (Fri Sep 30 13:22:44 UTC 2005) +# For: (age) Adrian Thurston,,, +# Title: foo +# Pages: 1 +1200 2 +0 32 #d2d2d2 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1470.000 376.000 1380 496 1470 226 1560 496 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2306.000 376.000 2216 496 2306 226 2396 496 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3130.000 364.000 3040 484 3130 214 3220 484 + 1 1 2.00 60.00 60.00 +5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2721.519 538.911 3088 714 2714 945 2356 716 + 1 1 2.00 60.00 60.00 +# 0 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 603 591 135 135 603 591 738 591 +# 1 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 1474 596 135 135 1474 596 1609 596 +# 2 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 2311 590 135 135 2311 590 2446 590 +# 3 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3135 591 135 135 3135 591 3270 591 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3938 589 135 135 3938 589 4073 589 +# 4 +1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3938 589 90 90 3938 589 4028 589 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 67 640 157 505 247 775 337 595 382 595 472 595 + 0.000 1.000 1.000 1.000 1.000 0.000 +# 0 -> 1 +3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2 + 1 1 2.00 60.00 60.00 + 747 589 1341 592 + 0.000 0.000 +# 1 -> 2 +3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2 + 1 1 2.00 60.00 60.00 + 1619 597 2179 594 + 0.000 0.000 +# 1 -> 2 +3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2 + 1 1 2.00 60.00 60.00 + 2457 590 3002 590 + 0.000 0.000 +# 1 -> 2 +3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2 + 1 1 2.00 60.00 60.00 + 3284 589 3810 588 + 0.000 0.000 +4 0 0 50 0 0 10 0.0000 4 75 240 885 536 a..z\001 +4 0 0 50 0 12 10 0.0000 4 105 210 3451 538 nl\001 +4 0 0 50 0 12 10 0.0000 4 105 210 2209 190 df\001 +4 0 0 50 0 0 10 0.0000 4 75 45 1832 542 :\001 +4 0 0 50 0 12 10 0.0000 4 105 210 2624 893 df\001 +4 0 0 50 0 0 10 0.0000 4 75 240 1348 184 a..z\001 +4 0 0 50 0 12 10 0.0000 4 75 210 2610 540 cr\001 +4 0 0 50 0 12 10 0.0000 4 75 210 3015 180 cr\001 diff --git a/doc/exsubtr.fig b/doc/exsubtr.fig new file mode 100644 index 0000000..0e35990 --- /dev/null +++ b/doc/exsubtr.fig @@ -0,0 +1,87 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 1395 270 3555 630 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 135 135 1575 450 1710 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 135 135 2475 450 2610 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 450 135 135 3375 450 3510 450 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 675 1215 135 135 675 1215 810 1215 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1215 90 90 2475 1215 2565 1215 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1215 135 135 2475 1215 2610 1215 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 1980 135 135 3375 1980 3510 1980 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1980 135 135 2475 1980 2610 1980 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 1980 135 135 1575 1980 1710 1980 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 90 90 2475 450 2565 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 90 90 1575 450 1665 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 1980 90 90 1575 1980 1665 1980 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1980 90 90 2475 1980 2565 1980 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 1080 1440 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 720 1350 1440 1980 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 810 1215 2340 1215 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1665 540 2385 1125 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1665 1890 2385 1305 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2475 1845 2475 1350 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1710 1980 2340 1980 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2610 1980 3240 1980 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1710 450 2340 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2610 450 3240 450 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 2475 585 2475 1080 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 3285 540 2565 1125 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 3285 1890 2565 1305 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 135 1260 225 1125 315 1395 405 1215 450 1215 540 1215 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 2610 1260 3015 1260 3150 1260 3150 1170 3015 1170 2610 1170 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 0 10 0.0000 4 105 45 990 720 i\001 +4 0 0 50 0 0 10 0.0000 4 105 60 1125 1620 f\001 +4 0 0 50 0 0 10 0.0000 4 135 660 1215 1170 a-e,g-h,j-z\001 +4 0 0 50 0 0 10 0.6807 4 75 195 2880 810 a-z\001 +4 0 0 50 0 0 10 5.6025 4 75 195 2925 1530 a-z\001 +4 0 0 50 0 0 10 0.0000 4 75 210 2520 720 u-z\001 +4 0 0 50 0 0 10 0.0000 4 105 195 2205 1755 a-q\001 +4 0 0 50 0 0 10 0.0000 4 75 195 2520 1755 s-z\001 +4 0 0 50 0 0 10 0.0000 4 75 180 2205 720 a-s\001 +4 0 0 50 0 0 10 0.0000 4 75 75 1980 1935 o\001 +4 0 0 50 0 0 10 0.0000 4 75 60 2835 1935 r\001 +4 0 0 50 0 0 10 0.0000 4 90 60 2835 405 t\001 +4 0 0 50 0 0 10 0.0000 4 75 75 1935 405 n\001 +4 0 0 50 0 0 10 5.6025 4 105 495 1845 630 a-m,o-z\001 +4 0 0 50 0 0 10 0.6807 4 105 450 1800 1710 a-n,p-z\001 +4 0 0 50 0 0 10 0.0000 4 75 195 2835 1125 a-z\001 diff --git a/doc/opconcat.fig b/doc/opconcat.fig new file mode 100644 index 0000000..312e301 --- /dev/null +++ b/doc/opconcat.fig @@ -0,0 +1,43 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 225 180 1530 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 630 135 135 585 630 720 630 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 90 90 1215 450 1305 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 135 135 1215 450 1350 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 810 90 90 1215 810 1305 810 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 810 135 135 1215 810 1350 810 +3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8 + 225 630 495 270 1125 180 1485 270 1530 630 1485 990 + 1125 1080 495 990 + 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 +-6 +6 1980 180 3285 1080 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 450 90 90 2970 450 3060 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 450 135 135 2970 450 3105 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2340 630 135 135 2340 630 2475 630 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 810 135 135 2970 810 3105 810 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 810 90 90 2970 810 3060 810 +3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8 + 1980 630 2250 270 2880 180 3240 270 3285 630 3240 990 + 2880 1080 2250 990 + 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 +-6 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1350 810 2205 675 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1350 450 2205 585 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 675 135 540 225 810 315 630 360 630 450 630 + 0.000 1.000 1.000 1.000 1.000 0.000 +4 0 0 50 0 32 10 0.0000 4 75 75 1710 450 e\001 +4 0 0 50 0 32 10 0.0000 4 75 75 1710 900 e\001 diff --git a/doc/opor.fig b/doc/opor.fig new file mode 100644 index 0000000..7dbb8ca --- /dev/null +++ b/doc/opor.fig @@ -0,0 +1,42 @@ +#FIG 3.2 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 0 765 765 1170 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 945 135 135 585 945 720 945 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 45 990 135 855 225 1125 315 945 360 945 450 945 + 0.000 1.000 1.000 1.000 1.000 0.000 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 1440 135 135 1215 1440 1350 1440 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1260 90 90 1845 1260 1935 1260 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1260 135 135 1845 1260 1980 1260 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 270 90 90 1845 270 1935 270 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 270 135 135 1845 270 1980 270 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 135 135 1215 450 1350 450 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 630 135 135 1845 630 1980 630 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 630 90 90 1845 630 1935 630 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1620 90 90 1845 1620 1935 1620 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1620 135 135 1845 1620 1980 1620 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 675 855 1125 540 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 675 1035 1125 1350 +3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8 + 855 1440 1125 1080 1755 990 2115 1080 2160 1440 2115 1800 + 1755 1890 1125 1800 + 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 +3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8 + 855 450 1125 90 1755 0 2115 90 2160 450 2115 810 + 1755 900 1125 810 + 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 +4 0 0 50 0 32 10 0.0000 4 75 75 720 1260 e\001 +4 0 0 50 0 32 10 0.0000 4 75 75 720 720 e\001 diff --git a/doc/opstar.fig b/doc/opstar.fig new file mode 100644 index 0000000..5bac654 --- /dev/null +++ b/doc/opstar.fig @@ -0,0 +1,49 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 360 495 1125 900 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 945 675 135 135 945 675 1080 675 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 405 720 495 585 585 855 675 675 720 675 810 675 + 0.000 1.000 1.000 1.000 1.000 0.000 +-6 +6 2070 135 2430 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 315 90 90 2250 315 2340 315 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 315 135 135 2250 315 2385 315 +-6 +6 969 -122 1329 238 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1149 58 90 90 1149 58 1239 58 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1149 58 135 135 1149 58 1284 58 +-6 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1620 495 135 135 1620 495 1755 495 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 675 135 135 2250 675 2385 675 +1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 675 90 90 2250 675 2340 675 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 1080 630 1485 540 +2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2 + 1 1 2.00 60.00 60.00 + 973 543 1103 203 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 2385 360 2700 630 2700 1215 1980 1395 1260 1125 978 801 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6 + 1 1 2.00 60.00 60.00 + 2385 720 2520 855 2475 1125 1935 1215 1395 1035 1067 730 + 0.000 1.000 1.000 1.000 1.000 0.000 +3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8 + 1260 495 1530 135 2160 45 2520 135 2565 495 2520 855 + 2160 945 1530 855 + 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 +4 0 0 50 0 32 10 0.0000 4 75 75 1845 1125 e\001 +4 0 0 50 0 32 10 0.0000 4 75 75 1845 1440 e\001 +4 0 0 50 0 32 10 0.0000 4 75 75 1156 549 e\001 +4 0 0 50 0 32 10 0.0000 4 75 75 896 442 e\001 diff --git a/doc/ragel-guide.tex b/doc/ragel-guide.tex new file mode 100644 index 0000000..db5f88f --- /dev/null +++ b/doc/ragel-guide.tex @@ -0,0 +1,2628 @@ +% +% Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +% + +% This file is part of Ragel. +% +% Ragel is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% Ragel is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Ragel; if not, write to the Free Software +% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +\documentclass[letterpaper,12pt,oneside]{book} +\usepackage{pslatex} +\usepackage{graphics} +\usepackage{comment} +\usepackage{multicol} +\usepackage[medium]{titlesec} + +\topmargin 0in +\oddsidemargin 0in +\textwidth 6.5in +\textheight 8.5in + +\setlength{\parskip}{0pt} +\setlength{\topsep}{0pt} +\setlength{\partopsep}{0pt} +\setlength{\itemsep}{0pt} + +\input{version} + +\newcommand{\verbspace}{\vspace{10pt}} +\newcommand{\graphspace}{\vspace{10pt}} + +\renewcommand\floatpagefraction{.99} +\renewcommand\topfraction{.99} +\renewcommand\bottomfraction{.99} +\renewcommand\textfraction{.01} +\setcounter{totalnumber}{50} +\setcounter{topnumber}{50} +\setcounter{bottomnumber}{50} + +\begin{document} + +% +% Title page +% +\thispagestyle{empty} +\begin{center} +\vspace*{3in} +{\huge Ragel State Machine Compiler}\\ +\vspace*{12pt} +{\Large User Guide}\\ +\vspace{1in} +by\\ +\vspace{12pt} +{\large Adrian Thurston}\\ +\end{center} +\clearpage + +\pagenumbering{roman} + +% +% License page +% +\chapter*{License} +Ragel version \version, \pubdate\\ +Copyright \copyright\ 2003, 2004, 2005, 2006 Adrian Thurston +\vspace{6mm} + +{\bf\it\noindent This document is part of Ragel, and as such, this document is +released under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2 of the License, or (at your option) +any later version.} + +\vspace{5pt} + +{\bf\it\noindent Ragel is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details.} + +\vspace{5pt} + +{\bf\it\noindent You should have received a copy of the GNU General Public +License along with Ragel; if not, write to the Free Software Foundation, Inc., +59 Temple Place, Suite 330, Boston, MA 02111-1307 USA} + +% +% Table of contents +% +\clearpage +\tableofcontents +\clearpage + +% +% Chapter 1 +% + +\pagenumbering{arabic} + +\chapter{Introduction} + +\section{Abstract} + +Regular expressions are used heavily in practice for the purpose of specifying +parsers. However, they are normally used as black boxes linked together with +program logic. User actions are associated with entire expressions and matched +text is extracted from input. With these facilities it is not possible to +specify an entire parser with a single regular expression because practical +parsing tasks invariably involve the execution of arbitrary user code +throughout the course of parsing. + +Ragel is a software development tool which allows the user to embed actions into +regular expressions without disrupting the regular expression syntax. +Consequently, one can specify an entire parser using a single regular +experssion. The single-expression model affords concise +and elegant descriptions of languages and the generation of very simple, +fast and robust code. Ragel compiles finite state machines from a high level +regular language notation to executable C, C++, Objective-C or D. + +In addition to building state machines from regular expressions, Ragel allows +the programmer to directly specify state machines with state charts. These two +notations may also be freely combined. There are facilities for controlling +nondeterminism in the resulting machines and building scanners using the +longest-match paradigm. Ragel can produce code that runs as fast as manually +constructed machines. Ragel can handle integer-sized alphabets and can compile +very large state machines. + +\section{Motivation} + +When a programmer is faced with the task of producing a parser for a +context-free language there are many tools to choose from. It is quite common +to generate useful and efficient parsers for programming languages from a +formal grammar. It is also quite common for programmers to avoid such tools +when making parsers for simple computer languages, such as file formats and +communication protocols. Such languages often meet the criteria for the +regular languages. Tools for processing the context-free languages are simply +too heavyweight for the purpose of parsing regular languages because the extra +run-time effort required for supporting the recursive nature of context-free +languages is wasted. + +Regular expressions are more appropriate than context-free grammars for a large +number of parsing probelems. Parsers based on them have many advantages over +hand written parsers. Regular expression syntax is convenient, +concise and easy to maintain. Existing +parsing tools based on regular expressions, such as Lex, Re2C, Sed, Awk and +Perl, are normally split into two levels: a regular expression matching engine +and some kind of program logic for linking patterns together and executing user +code. + +As an example, Lex requires the user to consider a language as a sequence +of independent patterns. +Unfortunately, there are many computer languages that are considered regular, +which do not fit this model. This model also places restrictions on when action +code may be executed. Since action code can only be associated with complete +patterns, if action code must be executed before an entire pattern is matched +then the pattern must be broken into smaller units. Instead of being forced to +disrupt the regular expression syntax, it is desirable to retain a single +expression and embed code for performing actions directly into the transitions +which move over the characters. After all we know the transitions are there. + +Perl allows one to link patterns together using arbitrary program code. This +is very flexible and powerful, however we can be more concise, clear and robust +if we avoid gluing together regular expressions with if statements and while +loops, and instead only compose parsers with regular expression operators. To +achieve this we require an action execution model for associating code with the +sub-expressions of a regular expression in a way that does not disrupt its +syntax. + +The primary goal of Ragel is therefore to provide developers with an ability to embed +actions into the transitions and states of a regular expression in support the +definition of entire parsers or large sections of parsers using a single +regular expression that is compiled to a simple state machine. From the +regular expression we gain a clear and concise statement of our language. From +the state machine we obtain a very fast and robust executable that lends itself +to many kinds of analysis and visualization. + +\section{Overview} + +Ragel is a language for specifying state machines. The Ragel program is a +compiler that assembles a state machine definition to executable code. Ragel +is based on the principle that any regular language can be converted to a +deterministic finite state automaton. Since every regular language has a state +machine representation and vice versa, the terms regular language and state +machine (or just machine) will be used interchangeably in this document. + +Ragel outputs machines to C, C++, Objective-C, or D code. The output is +designed to be generic and is not bound to any particular input or processing +method. A Ragel machine expects to have data passed to it in buffer blocks. +When there is no more input, the machine can be queried for acceptance. In +this way, a Ragel machine can be used to simply recognize a regular language +like a regular expression library. By embedding code into the regular language, +a Ragel machine can also be used to parse input. + +The Ragel input language has many operators for constructing and manipulating +machines. Machines are built up from smaller machines, to bigger ones, to the +final machine representing the language that needs to be recognized or parsed. + +The core state machine construction operators are those found in most ``Theory +of Computation'' textbooks. They date back to the 1950s and are widely studied. +They are based on set operations and permit one to think of languages as a set +of strings. They are Union, Intersection, Subtraction, Concatenation and Kleene +Star. Put together, these operators make up what most people know as regular +expressions. Ragel also provides a longest-match construction for easily +building scanners and provides operators for explicitly constructing machines +using a state chart method. In the state chart method one joins machines +together without any implied transitions and then explicitly specifies where +epsilon transitions should be drawn. + +The state machine manipulation operators are specific to Ragel. They allow the +programmer to access the states and transitions of regular languages. There are +two uses of the manipulation operators. The first and primary use is to embed +code into transitions and states, allowing the programmer to specify the +actions of the state machine. + +Following a number of action embeddings, a single transition can have a number +of actions embedded in it. When making a nondeterministic specification into a +DFA using machines that have embedded actions, new transitions are often made +that have the combined actions of several source transitions. Ragel ensures +that multiple actions associated with a single transition are ordered +consistently with respect to the order of reference and the natural ordering +implied by the construction operators. + +The second use of the manipulation operators is to assign priorities in +transitions. Priorities provide a convenient way of controlling any +nondeterminism introduced by the construction operators. Suppose two +transitions leave from the same state and go to distinct target states on the +same character. If these transitions are assigned conflicting priorities, then +during the determinization process the transition with the higher priority will +take precedence over the transition with the lower priority. The lower priority +transition gets abandoned. The transitions would otherwise be combined to a new +transition that goes to a new state which is a combination of the original +target states. Priorities are often required for segmenting machines. The most +common uses of priorities have been encoded into a set of simple operators +which should be used instead of priority embeddings whenever possible. + +There are four operators for embedding actions and priorities into the +transitions of a state machine, these correspond to the different +classes of transitions in a machine. It is possible to embed into start +transitions, finishing transitions, all transitions or pending out +transitions. The embedding of pending out transitions is a special case. +These transition embeddings gets stored in the final states of a machine. They +are transferred to any transitions that may be made going out of the machine by +a concatenation or kleene star operator. + +There are several more operators for embedding actions into states. Like the +transition embeddings, there are various different classes of states that the +embedding operators access. For example, one can access start states, final +states or all states, among others. Unlike the transition +embeddings, there +are several different types of state action embeddings. These are executed at various +different times during the processing of input. It is possible to embed +actions which are exectued on all transitions into a state, all transitions out of a state, +transitions taken on the error event or on the EOF event. + +Within actions, it is possible to influence the behaviour of the state machine. +The user can write action code that jumps or calls to another portion of the +machine, changes the current character being processed, or breaks out of the +processing loop. With the state machine calling feature Ragel can be used to +parse languages which are not regular. For example, one can parse balanced +parentheses by calling into a parser when an open bracket character is seen and +returning to the state on the top of the stack when the corresponding closing +bracket character is seen. More complicated context-free languages such as +expressions in C, are out of the scope of Ragel. + +Ragel provides a longest-match construction operator which eases the task of +building scanners. This construction behaves much like the primary processing +model of Lex. The generated code, which relies on user-defined variables for +backtracking, repeatedly tries to match patterns to the input, favouring longer +patterns over shorter ones and patterns that appear ahead of others when the +lengths of the possible matches are identical. When a pattern is matched the +associated action is executed. Longest-match machines take Ragel out of the +domain of pure state machines and require the user to maintain the backtracking +related variables. However, longest-match machines integrate well with regular +state machine instantiations. They can be called to or jumped to only when +needed, or they can be called out of or jumped out of when a simpler, pure +state machine model is needed. + +Two types of output code style are available. Ragel can produce a table-driven +machine or a directly executable machine. The directly executable machine is much +faster than the table-driven. On the other hand, the table-driven machine is +more compact and less demanding on the host language compiler. It is better +suited to compiling large state machines and in the future will be used for +coverage statistics gathering and debugging. + +\section{Related Work} + +Lex is perhaps the best-known tool for constructing parsers from regular +expressions. In the Lex processing model, generated code attempts to match one +of the user's regular expression patterns, favouring longer matches over +shorter ones. Once a match is made it then executes the code associated with +the pattern and consumes the matching string. This process is repeated until +the input is fully consumed. + +Through the use of start conditions, related sets of patterns may be defined. +The active set may be changed at any time. This allows the user to define +different lexical regions. It also allows the user to link patterns together by +requiring that some patterns come before others. This is quite like a +concatenation operation. However, use of Lex for languages that require a +considerable amount of pattern concatenation is inappropriate. In such cases a +Lex program deteriorates into a manually specified state machine, where start +conditions define the states and pattern actions define the transitions. Lex +is therefore best suited to parsing tasks where the language to be parsed can +be described in terms of regions of tokens. + +Lex is useful in many scenarios and has undoubtedly stood the test of time. +There are, however, several drawbacks to using Lex. Lex can impose too much +overhead for parsing applications where buffering is not required because all +the characters are available in a single string. In these cases there is +structure to the language to be parsed and a parser specification tool can +help, but employing a heavyweight processing loop that imposes a stream +``pull'' model and dynamic input buffer allocation is inappropriate. An +example of this kind of scenario is the conversion of floating point numbers +contained in a string to their corresponding numerical values. + +Another drawback is that +Lex patterns are black boxes. It is not possbile to execute a user action while +matching a character contained inside a pattern. For example, if scanning a +programming language and string literals can contain newlines which must be +counted, a Lex user must break up a string literal pattern so as to associate +an action with newlines. This forces the definition of a new start condition. +Alternatively the user can reprocess the text of the matched string literal to +count newlines. + +\begin{comment} +How ragel is different from Lex. + +%Like Re2c, Ragel provides a simple execution model that does not make any +%assumptions as to how the input is collected. Also, Ragel does not do any +%buffering in the generated code. Consequently there are no dependencies on +%external functions such as \verb|malloc|. + +%If buffering is required it can be manually implemented by embedding actions +%that copy the current character to a buffer, or data can be passed to the +%parser using known block boundaries. If the longest-match operator is used, +%Ragel requires the user to ensure that the ending portion of the input buffer +%is preserved when the buffer is exhaused before a token is fully matched. The +%user should move the token prefix to a new memory location, such as back to the +%beginning of the input buffer, then place the subsequently read input +%immediately after the prefix. + +%These properties of Ragel make it more work to write a program that requires +%the longest-match operator or buffering of input, however they make Ragel a +%more flexible tool that can produce very simple and fast-running programs under +%a variety of input acquisition arrangements. + +%In Ragel, it is not necessary +%to introduce start conditions to concatenate tokens and retain action +%execution. Ragel allows one to structure a parser as a series of tokens, but +%does not require it. + +%Like Lex and Re2C, Ragel is able to process input using a longest-match +%execution model, however the core of the Ragel language specifies parsers at a +%much lower level. This core is built around a pure state machine model. When +%building basic machines there is no implied algorithm for processing input +%other than to move from state to state on the transitions of the machine. This +%core of pure state machine operations makes Ragel well suited to handling +%parsing problems not based on token scanning. Should one need to use a +%longest-match model, the functionality is available and the lower level state +%machine construction facilities can be used to specify the patterns of a +%longest-match machine. + +%This is not possible in Ragel. One can only program +%a longest-match instantiation with a fixed set of rules. One can jump to +%another longest-match machine that employs the same machine definitions in the +%construction of its rules, however no states will be shared. + +%In Ragel, input may be re-parsed using a +%different machine, but since the action to be executed is associated with +%transitions of the compiled state machine, the longest-match construction does +%not permit a single rule to be excluded from the active set. It cannot be done +%ahead of time nor in the excluded rule's action. +\end{comment} + +The Re2C program defines an input processing model similar to that of Lex. +Unlike Lex, Re2C focuses on making generated state machines run very fast and +integrate easily into any program, free of dependencies. Re2C generates +directly executable code and is able to claim that generated parsers run nearly +as fast as their hand-coded equivalents. This is very important for user +adoption, as programmers are reluctant to use a tool when a faster alternative +exists. A consideration to ease of use is also important because developers +need the freedom to integrate the generated code as they see fit. + +Many scripting languages provide ways of composing parsers by linking regular +expressions using program logic. For example, Sed and Awk are two established +Unix scripting tools that allow the programmer to exploit regular expressions +for the purpose of locating and extracting text of interest. High-level +programming languages such as Perl, Python, PHP and Ruby all provide regular +expression libraries that allow the user to combine regular expressions with +arbitrary code. + +In addition to supporting the linking of regular expressions with arbitrary +program logic, the Perl programming language permits the embedding of code into +regular expressions. Perl embeddings do not translate into the embedding of +code into deterministic state machines. Perl regular expressions are in fact +not fully compiled to deterministic machines when embedded code is involved. +They are instead interpreted and involve backtracking. This is shown by the +following Perl program. When it is fed the input \verb|abcd| the interpretor +attempts to match the first alternative, printing \verb|a1 b1|. When this +possibility fails it backtracks and tries the second possibility, printing +\verb|a2 b2|, at which point it succeeds. A similar parser expressed in Ragel +will attempt both of the alternatives concurrently, printing +\verb|a1 a2 b1 b2|. + +\verbspace +\begin{verbatim} +print "YES\n" if ( <STDIN> =~ + /( a (?{ print "a1 "; }) b (?{ print "b1 "; }) cX ) | + ( a (?{ print "a2 "; }) b (?{ print "b2 "; }) cd )/x ) +\end{verbatim} + +\section{Development Status} + +Ragel is a relatively new tool and is under continuous development. As a rough +release guide, minor revision number changes are for implementation +improvements and feature additions. Major revision number changes are for +implementation and language changes that do not preserve backwards +compatibility. Though in the past this has not always held true: changes that +break code have crept into minor version number changes. Typically, the +documentation lags behind the development in the interest of documenting only +the lasting features. The latest changes are always documented in the ChangeLog +file. As Ragel stabilizes, which is expected in the 5.x line, the version +numbering rules will become more strict and the documentation will become more +plentiful. + + +\chapter{Constructing State Machines} + +\section{Ragel State Machine Specifications} + +A Ragel input file consists of a host language code file with embedded machine +specifications. Ragel normally passes input straight to output. When it sees +a machine specification it stops to read the Ragel statements and possibly generate +code in place of the specification. +Afterwards it continues to pass input through. There +can be any number of FSM specifications in an input file. A multi-line FSM spec +starts with \verb|%%{| and ends with \verb|}%%|. A single-line FSM spec starts +with \verb|%%| and ends at the first newline. + +While Ragel is looking for FSM specifications it does basic lexical analysis on +the surrounding input. It interprets literal strings and comments so a +\verb|%%| sequence in either of those will not trigger the parsing of an FSM +specification. Ragel does not pass the input through any preprocessor nor does it +interpret preprocessor directives itself so includes, defines and ifdef logic +cannot be used to alter the parse of a Ragel input file. It is therefore not +possible to use an \verb|#if 0| directive to comment out a machine as is +commonly done in C code. As an alternative, a machine can be prevented from +causing any generated output by commenting out the write statements. + +In Figure \ref{cmd-line-parsing}, a multi-line machine is used to define the +machine and single line machines are used to trigger the writing of the machine +data and execution code. + +\begin{figure} +\begin{multicols}{2} +\small +\begin{verbatim} +#include <string.h> +#include <stdio.h> + +%%{ + machine foo; + main := + ( 'foo' | 'bar' ) + 0 @{ res = 1; }; +}%% + +%% write data noerror nofinal; +\end{verbatim} +\columnbreak +\begin{verbatim} +int main( int argc, char **argv ) +{ + int cs, res = 0; + if ( argc > 1 ) { + char *p = argv[1]; + char *pe = p + strlen(p) + 1; + %% write init; + %% write exec; + } + printf("result = %i\n", res ); + return 0; +} +\end{verbatim} +\end{multicols} +\caption{Parsing a command line argument.} +\label{cmd-line-parsing} +\end{figure} + + +\subsection{Naming Ragel Blocks} + +\begin{verbatim} +machine fsm_name; +\end{verbatim} +\verbspace + +The \verb|machine| statement gives the name of the FSM. If present in a +specification, this statement must appear first. If a machine specification +does not have a name then Ragel uses the previous specification name. If no +previous specification name exists then this is an error. Because FSM +specifications persist in memory, a machine's statements can be spread across +multiple machine specifications. This allows one to break up a machine across +several files or draw in statements that are common to multiple machines using +the include statement. + +\subsection{Including Ragel Code} + +\begin{verbatim} +include FsmName "inputfile.rl"; +\end{verbatim} +\verbspace + +The \verb|include| statement can be used to draw in the statements of another FSM +specification. Both the name and input file are optional, however at least one +must be given. Without an FSM name, the given input file is searched for an FSM +of the same name as the current specification. Without an input file the +current file is searched for a machine of the given name. If both are present, +the given input file is searched for a machine of the given name. + +\subsection{Machine Definition} +\label{definition} + +\begin{verbatim} +<name> = <expression>; +\end{verbatim} +\verbspace + +The machine definition statement associates an FSM expression with a name. Machine +expressions assigned to names can later be referenced by other expressions. A +definition statement on its own does not cause any states to be generated. It is simply a +description of a machine to be used later. States are generated only when a definition is +instantiated, which happens when a definition is referenced in an instantiated +expression. + +\subsection{Machine Instantiation} +\label{instantiation} + +\begin{verbatim} +<name> := <expression>; +\end{verbatim} +\verbspace + +The machine instantiation statement generates a set of states representing an expression and +associates a name with the entry point. Each instantiation generates a distinct +set of states. At a very minimum the \verb|main| machine must be instantiated. +Other machines may be instantiated and control passed to them by use of +\verb|fcall|, \verb|fgoto| or \verb|fnext| statements. + +\begin{comment} +\subsection{Write Statement} + +\begin{verbatim} +write <component> [options]; +\end{verbatim} +\verbspace + +The write statement is used to generate parts of the machine. There are four +components that can be generated: the state machine's static data, the +initialization code, the execution code and the EOF action execution code. The +write statement is described in detail in Section \ref{write-statement}. +\end{comment} + +\section{Lexical Analysis of an FSM Specification} +\label{lexing} + +Within a machine specification the following lexical rules apply to the parse +of the input. + +\begin{itemize} + +\item The \verb|#| symbol begins a comment that terminates at the next newline. + +\item The symbols \verb|""|, \verb|''|, \verb|//|, \verb|[]| behave as the +delimiters of literal strings. With them, the following escape sequences are interpreted: + +\verb| \0 \a \b \t \n \v \f \r| + +A backslash at the end of a line joins the following line onto the current. A +backslash preceding any other character removes special meaning. This applies +to terminating characters and to special characters in regular expression +literals. As an exception, regular expression literals do not support escape +sequences as the operands of a range within a list. See the bullet on regular +expressions in Section \ref{basic}. + +\item The symbols \verb|{}| delimit a block of host language code that will be +embedded into the machine as an action. Within the block of host language +code, basic lexical analysis of C/C++ comments and strings is done in order to +correctly find the closing brace of the block. With the exception of FSM +commands embedded in code blocks, the entire block is preserved as is for +identical reproduction in the output code. + +\item The pattern \verb|[+-]?[0-9]+| denotes an integer in decimal format. +Integers used for specifying machines may be negative only if the alphabet type +is signed. Integers used for specifying priorities may be positive or negative. + +\item The pattern \verb|0x[0-9a-fA-f]+| denotes an integer in hexadecimal +format. + +\item The keywords are \verb|access|, \verb|action|, \verb|alphtype|, +\verb|getkey|, \verb|write|, \verb|machine| and \verb|include|. + +\item The pattern \verb|[a-zA-Z_][a-zA-Z_0-9]*| denotes an identifier. + +%\item The allowable symbols are: +% +%\verb/ ( ) ! ^ * ? + : -> - | & . , := = ; > @ $ % /\\ +%\verb| >/ $/ %/ </ @/ <>/ >! $! %! <! @! <>!|\\ +%\verb| >^ $^ %^ <^ @^ <>^ >~ $~ %~ <~ @~ <>~|\\ +%\verb| >* $* %* <* @* <>*| + +\item Any amount of whitespace may separate tokens. + +\end{itemize} + +%\section{Parse of an FSM Specification} + +%The following statements are possible within an FSM specification. The +%requirements for trailing semicolons loosely follow that of C. +%A block +%specifying code does not require a trailing semicolon. An expression +%statement does require a trailing semicolon. + + +\section{Basic Machines} +\label{basic} + +The basic machines are the base operands of regular language expressions. They +are the smallest unit to which machine construction and manipulation operators +can be applied. + +In the diagrams that follow the symbol \verb|df| represents +the default transition, which is taken if no other transition can be taken. The +symbol \verb|cr| represents the carriage return character, \verb|nl| represents the newline character (aka line feed) and the symbol +\verb|sp| represents the space character. + +\begin{itemize} + +\item \verb|'hello'| -- Concatenation Literal. Produces a machine that matches +the sequence of characters in the quoted string. If there are 5 characters +there will be 6 states chained together with the characters in the string. See +Section \ref{lexing} for information on valid escape sequences. + +\begin{center} +\includegraphics{bmconcat} +\end{center} + +It is possible +to make a concatenation literal case-insensitive by appending an \verb|i| to +the string, for example \verb|'cmd'i|. + +\item \verb|"hello"| -- Identical to the single quoted version. + +\item \verb|[hello]| -- Or Expression. Produces a union of characters. There +will be two states with a transition for each unique character between the two states. +The \verb|[]| delimiters behave like the quotes of a literal string. For example, +\verb|[ \t]| means tab or space. The or expression supports character ranges +with the \verb|-| symbol as a separator. The meaning of the union can be negated +using an initial \verb|^| character as in standard regular expressions. +See Section \ref{lexing} for information on valid escape sequences +in or expressions. + +\begin{center} +\includegraphics{bmor} +\end{center} + +\item \verb|''|, \verb|""|, and \verb|[]| -- Zero Length Machine. Produces a machine +that matches the zero length string. Zero length machines have one state that is both +a start state and a final state. + +\begin{center} +\includegraphics{bmnull} +\end{center} + +\item \verb|number| -- Simple Machine. Produces a two state machine with one +transition on the given number. The number may be in decimal or hexadecimal +format and should be in the range allowed by the alphabet type. The minimum and +maximum values permitted are defined by the host machine that Ragel is compiled +on. For example, numbers in a \verb|short| alphabet on an i386 machine should +be in the range \verb|-32768| to \verb|32767|. + +\begin{center} +\includegraphics{bmnum} +\end{center} + +\item \verb|/simple_regex/| -- Regular Expression. Regular expressions are +parsed as a series of expressions that will be concatenated together. Each +concatenated expression +may be a literal character, the any character specified by the \verb|.| +symbol, or a union of characters specified by the \verb|[]| delimiters. If the +first character of a union is \verb|^| then it matches any character not in the +list. Within a union, a range of characters can be given by separating the first +and last characters of the range with the \verb|-| symbol. Each +concatenated machine may have repetition specified by following it with the +\verb|*| symbol. The standard escape sequences described in Section +\ref{lexing} are supported everywhere in regular expressions except as the +operands of a range within in a list. This notation also supports the \verb|i| +trailing option. Use it to produce case-insensitive machines, as in \verb|/GET/i|. + +Ragel does not support very complex regular expressions because the desired +results can always be achieved using the more general machine construction +operators listed in Section \ref{machconst}. The following diagram shows the +result of compiling \verb|/ab*[c-z].*[123]/|. + +\begin{center} +\includegraphics{bmregex} +\end{center} + +\item \verb|lit .. lit| -- Range. Produces a machine that matches any +characters in the specified range. Allowable upper and lower bounds of the +range are concatenation literals of length one and number literals. For +example, \verb|0x10..0x20|, \verb|0..63|, and \verb|'a'..'z'| are valid ranges. +The bounds should be in the range allowed by the alphabet type. + +\begin{center} +\includegraphics{bmrange} +\end{center} + +\item \verb|variable_name| -- Lookup the machine definition assigned to the +variable name given and use an instance of it. See Section \ref{definition} for +an important note on what it means to reference a variable name. + +\item \verb|builtin_machine| -- There are several built-in machines available +for use. They are all two state machines for the purpose of matching common +classes of characters. They are: + +\begin{itemize} + +\item \verb|any | -- Any character in the alphabet. + +\item \verb|ascii | -- Ascii characters. \verb|0..127| + +\item \verb|extend| -- Ascii extended characters. This is the range +\verb|-128..127| for signed alphabets and the range \verb|0..255| for unsigned +alphabets. + +\item \verb|alpha | -- Alphabetic characters. \verb|[A-Za-z]| + +\item \verb|digit | -- Digits. \verb|[0-9]| + +\item \verb|alnum | -- Alpha numerics. \verb|[0-9A-Za-z]| + +\item \verb|lower | -- Lowercase characters. \verb|[a-z]| + +\item \verb|upper | -- Uppercase characters. \verb|[A-Z]| + +\item \verb|xdigit| -- Hexadecimal digits. \verb|[0-9A-Fa-f]| + +\item \verb|cntrl | -- Control characters. \verb|0..31| + +\item \verb|graph | -- Graphical characters. \verb|[!-~]| + +\item \verb|print | -- Printable characters. \verb|[ -~]| + +\item \verb|punct | -- Punctuation. Graphical characters that are not alphanumerics. +\verb|[!-/:-@[-`{-~]| + +\item \verb|space | -- Whitespace. \verb|[\t\v\f\n\r ]| + +\item \verb|zlen | -- Zero length string. \verb|""| + +\item \verb|empty | -- Empty set. Matches nothing. \verb|^any| + +\end{itemize} +\end{itemize} + +\section{Operator Precedence} +The following table shows operator precedence from lowest to highest. Operators +in the same precedence group are evaluated from left to right. + +\verbspace +\begin{tabular}{|c|c|c|} +\hline +1&\verb| , |&Join\\ +\hline +2&\verb/ | & - --/&Union, Intersection and Subtraction\\ +\hline +3&\verb| . <: :> :>> |&Concatenation\\ +\hline +4&\verb| : |&Label\\ +\hline +5&\verb| -> |&Epsilon Transition\\ +\hline +&\verb| > @ $ % |&Transitions Actions and Priorities\\ +\cline{2-3} +&\verb| >/ $/ %/ </ @/ <>/ |&EOF Actions\\ +\cline{2-3} +6&\verb| >! $! %! <! @! <>! |&Global Error Actions\\ +\cline{2-3} +&\verb| >^ $^ %^ <^ @^ <>^ |&Local Error Actions\\ +\cline{2-3} +&\verb| >~ $~ %~ <~ @~ <>~ |&To-State Actions\\ +\cline{2-3} +&\verb| >* $* %* <* @* <>* |&From-State Action\\ +\hline +7&\verb| * ** ? + {n} {,n} {n,} {n,m} |&Repetition\\ +\hline +8&\verb| ! ^ |&Negation and Character-Level Negation\\ +\hline +9&\verb| ( <expr> ) |&Grouping\\ +\hline +\end{tabular} + +\section{Regular Language Operators} +\label{machconst} + +When using Ragel it is helpful to have a sense of how it constructs machines. +Sometimes this the determinization process can cause results that appear unusual to someone +unfamiliar with it. Ragel does not make use of any nondeterministic +intermediate state machines. All operators accept and return deterministic +machines. However, to ease the discussion, the operations are defined in terms +epsilon transitions. + +To draw an epsilon transition between two states \verb|x| and \verb|y|, is to +copy all of the properties of \verb|y| into \verb|x|. This involves drawing in +all of \verb|y|'s to-state actions, EOF actions, etc., as well as its +transitions. If \verb|x| and \verb|y| both have a transition out on the same +character, then the transitions must be combined. During transition +combination a new transition is made which goes to a new state that is the +combination of both target states. The new combination state is created using +the same epsilon transition method. The new state has an epsilon transition +drawn to all the states that compose it. Since every time an epsilon transition +is drawn the creation of new epsilon transitions may be triggered, the process +of drawing epsilon transitions is repeated until there are no more epsilon +transitions to be made. + +A very common error that is made when using Ragel is to make machines that do +too much at once. That is, to create machines that have unintentional +nondeterminism. This usually results from being unaware of the common strings +between machines that are combined together using the regular language +operators. This can involve never leaving a machine, causing its actions to be +propagated through all the following states. Or it can involve an alternation +where both branches are unintentionally taken simultaneously. + +This problem forces one to think hard about the language that needs to be +matched. To guard against this kind of problem one must ensure that the machine +specification is divided up using boundaries that do not allow ambiguities from +one portion of the machine to the next. See Chapter +\ref{controlling-nondeterminism} for more on this problem and how to solve it. + +The Graphviz tool is an immense help when debugging improperly compiled +machines or otherwise learning how to use Ragel. In many cases, practical +parsing programs will be too large to completely visualize with Graphviz. The +proper approach is to reduce the language to the smallest subset possible that +still exhibits the characteristics that one wishes to learn about or to fix. +This can be done without modifying the source code using the \verb|-M| and +\verb|-S| options at the frontend. If a machine cannot be easily reduced, +embeddings of unique actions can be very useful for tracing a +particular component of a larger machine specification, since action names are +written out on transition labels. + +\subsection{Union} + +\verb/expr | expr/ +\verbspace + +The union operation produces a machine that matches any string in machine one +or machine two. The operation first creates a new start state. Epsilon +transitions are drawn from the new start state to the start states of both +input machines. The resulting machine has a final state set equivalent to the +union of the final state sets of both input machines. In this operation, there +is the opportunity for nondeterminism among both branches. If there are +strings, or prefixes of strings that are matched by both machines then the new +machine will follow both parts of the alternation at once. The union operation is +shown below. + +\graphspace +\begin{center} +\includegraphics{opor} +\end{center} +\graphspace + +The following example demonstrates the union of three machines representing +common tokens. + +\verbspace +\begin{verbatim} +# Hex digits, decimal digits, or identifiers +main := '0x' xdigit+ | digit+ | alpha alnum*; +\end{verbatim} +\verbspace + +\graphspace +\begin{center} +\includegraphics{exor} +\end{center} + +\subsection{Intersection} + +\verb|expr & expr| +\verbspace + +Intersection produces a machine that matches any +string which is in both machine one and machine two. To achieve intersection, a +union is performed on the two machines. After the result has been made +deterministic, any final state that is not a combination of final states from +both machines has its final state status revoked. To complete the operation, +paths that do not lead to a final state are pruned from the machine. Therefore, +if there are any such paths in either of the expressions they will be removed +by the intersection operator. Intersection can be used to require that two +independent patterns be simultaneously satisfied as in the following example. + +\verbspace +\begin{verbatim} +# Match lines four characters wide that contain +# words separated by whitespace. +main := + /[^\n][^\n][^\n][^\n]\n/* & + (/[a-z][a-z]*/ | [ \n])**; +\end{verbatim} +\verbspace + +\graphspace +\begin{center} +\includegraphics{exinter} +\end{center} + +\subsection{Difference} + +\verb|expr - expr| +\verbspace + +The difference operation produces a machine that matches +strings which are in machine one but which are not in machine two. To achieve subtraction, +a union is performed on the two machines. After the result has been made +deterministic, any final state that came from machine two or is a combination +of states involving a final state from machine two has its final state status +revoked. As with intersection, the operation is completed by pruning any path +that does not lead to a final state. The following example demonstrates the +use of subtraction to exclude specific cases from a set. + +\verbspace +\begin{verbatim} +# Subtract keywords from identifiers. +main := /[a-z][a-z]*/ - ( 'for' | 'int' ); +\end{verbatim} +\verbspace + +\graphspace +\begin{center} +\includegraphics{exsubtr} +\end{center} +\graphspace + +\subsection{Strong Difference} +\label{strong_difference} + +\verb|expr -- expr| +\verbspace + +Strong difference produces a machine that matches any string of the first +machine which does not have any string of the second machine as a substring. In +the following example, strong subtraction is used to excluded \verb|CRLF| from +a sequence. + +\verbspace +\begin{verbatim} +crlf = '\r\n'; +main := [a-z]+ ':' ( any* -- crlf ) crlf; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exstrongsubtr} +\end{center} +\graphspace + +This operator is equivalent to the following. + +\verbspace +\begin{verbatim} +expr - ( any* expr any* ) +\end{verbatim} + +\subsection{Concatenation} + +\verb|expr . expr| +\verbspace + +Concatenation produces a machine that matches all the strings in machine one followed by all +the strings in machine two. Concatenation draws epsilon transitions from the +final states of the first machine to the start state of the second machine. The +final states of the first machine loose their final state status, unless the +start state of the second machine is final as well. +Concatenation is the default operator. Two machines next to each other with no +operator between them results in the machines being concatenated together. + +\graphspace +\begin{center} +\includegraphics{opconcat} +\end{center} +\graphspace + +The opportunity for nondeterministic behaviour results from the possibility of +the final states of the first machine accepting a string which is also accepted +by the start state of the second machine. +The most common scenario that this happens in is the +concatenation of a machine that repeats some pattern with a machine that gives +a termination string, but the repetition machine does not exclude the +termination string. The example in Section \ref{strong_difference} +guards against this. Another example is the expression \verb|("'" any* "'")|. +When exectued the thread of control will +never leave the \verb|any*| machine. This is a problem especially if actions +are embedded to processes the characters of the \verb|any*| component. + +In the following example, the first machine is always active due to the +nondeterministic nature of concatenation. This particular nondeterminism is intended +however because we wish to permit EOF strings before the end of the input. + +\verbspace +\begin{verbatim} +# Require an eof marker on the last line. +main := /[^\n]*\n/* . 'EOF\n'; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exconcat} +\end{center} +\graphspace + +\noindent {\bf Note:} There is a language +ambiguity involving concatenation and subtraction. Because concatenation is the +default operator for two +adjacent machines there is an ambiguity between subtraction of +a positive numerical literal and concatenation of a negative numerical literal. +For example, \verb|(x-7)| could be interpreted as \verb|(x . -7)| or +\verb|(x - 7)|. In the Ragel language, the subtraction operator always takes precedence +over concatenation of a negative literal. Precedence was given to the +subtraction-based interpretation so as to adhere to the rule that the default +concatenation operator takes effect only when there are no other operators between +two machines. Beware of writing machines such as \verb|(any -1)| when what is +desired is a concatenation of \verb|any| and -1. Instead write +\verb|(any . -1)| or \verb|(any (-1))|. If in doubt of the meaning of your program do not +rely on the default concatenation operator, always use the \verb|.| symbol. + + +\subsection{Kleene Star} + +\verb|expr*| +\verbspace + +The machine resulting from the Kleene Star operator will match zero or more +repetitions of the machine it is applied to. +It creates a new start state and an additional final +state. Epsilon transitions are drawn between the new start state and the old start +state, between the new start state and the new final state, and +between the final states of the machine and the new start state. After the +machine is made deterministic the effect is of the final states getting all the +transitions of the start state. + +\graphspace +\begin{center} +\includegraphics{opstar} +\end{center} +\graphspace + +The possibility for nondeterministic behaviour arises if the final states have +transitions on any of the same characters as the start state. This is common +when applying kleene star to an alternation of tokens. Like the other problems +arising from nondeterministic behavior, this is discussed in more detail in Chapter +\ref{controlling-nondeterminism}. This particular problem can also be solved +by using the longest-match construction discussed in Section +\ref{generating-scanners} on scanners. + +In this simple +example, there is no nondeterminism introduced by the exterior kleene star due +the newline at the end of the regular expression. Without the newline the +exterior kleene star would be redundant and there would be ambiguity between +repeating the inner range of the regular expression and the entire regular +expression. Though it would not cause a problem in this case, unnecessary +nondeterminism in the kleene star operator often causes undesired results for +new Ragel users and must be guarded against. + +\verbspace +\begin{verbatim} +# Match any number of lines with only lowercase letters. +main := /[a-z]*\n/*; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exstar} +\end{center} + +\subsection{One Or More Repetition} + +\verb|expr+| +\verbspace + +This operator produces the concatenation of the machine with the kleene star of +itself. The result will match one or more repetitions of the machine. The plus +operator is equivalent to \verb|(expr . expr*)|. The plus operator makes +repetitions that cannot be zero length. + +\verbspace +\begin{verbatim} +# Match alpha-numeric words. +main := alnum+; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{explus} +\end{center} + +\subsection{Optional} + +\verb|expr?| +\verbspace + +The {\em optional} operator produces a machine that accepts the machine +given or the zero length string. The optional operator is equivalent to +\verb/(expr | '' )/. In the following example the optional operator is used to +extend a token. + +\verbspace +\begin{verbatim} +# Match integers or floats. +main := digit+ ('.' digit+)?; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exoption} +\end{center} + +\subsection{Repetition} + +\begin{tabbing} +\noindent \verb|expr {n}| \hspace{16pt}\=-- Exactly N copies of expr.\\ + +\noindent \verb|expr {,n}| \>-- Zero to N copies of expr.\\ + +\noindent \verb|expr {n,}| \>-- N or more copies of expr.\\ + +\noindent \verb|expr {n,m}| \>-- N to M copies of expr. +\end{tabbing} + +\subsection{Negation} + +\verb|!expr| +\verbspace + +Negation produces a machine that matches any string not matched by the given +machine. Negation is equivalent to \verb|(any* - expr)|. + +\verbspace +\begin{verbatim} +# Accept anything but a string beginning with a digit. +main := ! ( digit any* ); +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exnegate} +\end{center} + +\subsection{Character-Level Negation} + +\verb|^expr| +\verbspace + +Character-level negation produces a machine that matches any single character +not matched by the given machine. Character-Level Negation is equivalent to +\verb|(any - expr)|. + +\section{State Charts} + +It is not uncommon for programmers to implement +parsers as manually-coded state machines, either using a switch statement or a +state map compiler which takes a list of states, transitions and actions, and +generates code. + +This method can be a very effective programming technique for producing robust +code. The key disadvantage becomes clear when one attempts to comprehend such a +parser. Machines coded in this way usually require many lines, causing logic to +be spread out over large distances in the source file. Remembering the function +of a large number of states can be difficult and organizing the parser in a +sensible way requires discipline because branches and repetition present many +file layout options. This kind of programming takes a specification with +inherent structure such as looping, alternation and concatenation and expresses +it in a flat form. + +If we could take an isolated component of a manually programmed state chart, +that is, a subset of states that has only one entry point, and implement it +using regular language operators then we could eliminate all the explicit +naming of the states contained in it. By eliminating explicitly named states +and replacing them with higher-level specifications we simplify a parser +specification. + +For example, sometimes chains of states are needed, with only a small number of +possible characters appearing along the chain. These can easily be replaced +with a concatenation of characters. Sometimes a group of common states +implement a loop back to another single portion of the machine. Rather than +manually duplicate all the transitions that loop back, we may be able to +express the loop using a kleene star operator. + +Ragel allows one to take this state map simplification approach. We can build +state machines using a state map model and implement portions of the state map +using regular languages. In place of any transition in the state machine, +entire sub-state machines can be given. These can encapsulate functionality +defined elsewhere. An important aspect of the Ragel approach is that when we +wrap up a collection of states using a regular expression we do not loose +access to the states and transitions. We can still execute code on the +transitions that we have encapsulated. + +\subsection{Join} + +\verb|expr , expr , ...| +\verbspace + +Join a list of machines together without +drawing any transitions, without setting up a start state, and without +designating any final states. Transitions between the machines may be specified +using labels and epsilon transitions. The start state must be explicity +specified with the ``start'' label. Final states may be specified with the an +epsilon transition to the implicitly created ``final'' state. The join +operation allows one to build machines using a state chart model. + +\subsection{Label} + +\verb|label: expr| +\verbspace + +Attaches a label to an expression. Labels can be +used as the target of epsilon transitions and explicit control transfer +statements such \verb|fgoto| and \verb|fnext| in action +code. + +\subsection{Epsilon} + +\verb|expr -> label| +\verbspace + +Draws an epsilon transition to the state defined +by \verb|label|. Epsilon transitions are made deterministic when join +operators are evaluated. Epsilon transitions that are not in a join operation +are made deterministic when the machine definition that contains the epsilon is +complete. See Section \ref{labels} for information on referencing labels. + + +\section{Scanners} +\label{generating-scanners} + +The longest-match operator can be used to construct scanners. The generated +machine repeatedly attempts to match one of the given patterns, first favouring +longer pattern matches over shorter ones. If there is a choice between equal +length matches, the match of the pattern which appears first is chosen. + +\verbspace +\begin{verbatim} +<machine_name> := |* + pattern1 => action1; + pattern2 => action2; + ... + *|; +\end{verbatim} +\verbspace + +The longest-match construction operator is not a pure state machine operator. +It relies on the \verb|tokstart|, \verb|tokend| and \verb|act| variables to be +present so that it can backtrack and make pointers to the matched text +available to the user. If input is processed using multiple calls to the +execute code then the user must ensure that when a token is only partially +matched that the prefix is preserved on the subsequent invocation of the +execute code. + +The \verb|tokstart| variable must be defined as a pointer to the input data. +It is used for recording where the current token match begins. This variable +may be used in action code for retrieving the text of the current match. Ragel +ensures that in between tokens and outside of the longest-match machines that +this pointer is set to null. In between calls to the execute code the user must +check if \verb|tokstart| is set and if so, ensure that the data it points to is +preserved ahead of the next buffer block. This is described in more detail +below. + +The \verb|tokend| variable must also be defined as a pointer to the input data. +It is used for recording where a match ends and where scanning of the next +token should begin. This can also be used in action code for retrieving the +text of the current match. + +The \verb|act| variable must be defined as an integer type. It is used for +recording the identity of the last pattern matched when the scanner must go +past a matched pattern in an attempt to make a longer match. If the longer +match fails it may need to consult the act variable. In some cases use of the act +variable can be avoided because the value of the current state is enough +information to determine which token to accept, however in other cases this is +not enough and so the \verb|act| variable is used. + +When the longest-match operator is in use, the user's driver code must take on +some buffer management functions. The following algorithm gives an overview of +the steps that should be taken to properly use the longest-match operator. + +\begin{itemize} +\setlength{\parskip}{0pt} +\item Read a block of input data. +\item Run the execute code. +\item If \verb|tokstart| is set, the execute code will expect the incomplete +token to be preserved ahead of the buffer on the next invocation of the execute +code. +\begin{itemize} +\item Shift the data beginning at \verb|tokstart| and ending at \verb|pe| to the +beginning of the input buffer. +\item Reset \verb|tokstart| to the beginning of the buffer. +\item Shift \verb|tokend| by the distance from the old value of \verb|tokstart| +to the new value. The \verb|tokend| variable may or may not be valid. There is +no way to know if it holds a meaningful value because it is not kept at null +when it is not in use. It can be shifted regardless. +\end{itemize} +\item Read another block of data into the buffer, immediately following any +preserved data. +\item Run the scanner on the new data. +\end{itemize} + +Figure \ref{preserve_example} shows the required handling of an input stream in +which a token is broken by the input block boundaries. After processing up to +and including the ``t'' of ``characters'', the prefix of the string token must be +retained and processing should resume at the ``e'' on the next iteration of +the execute code. + +If one uses a large input buffer for collecting input then the number of times +the shifting must be done will be small. Furthermore, if one takes care not to +define tokens that are allowed to be very long and instead processes these +items using pure state machines or sub-scanners, then only a small amount of +data will ever need to be shifted. + +\begin{figure} +\begin{verbatim} + a) A stream "of characters" to be scanned. + | | | + p tokstart pe + + b) "of characters" to be scanned. + | | | + tokstart p pe +\end{verbatim} +\caption{Following an invocation of the execute code there may be a partially +matched token (a). The data of the partially matched token +must be preserved ahead of the new data on the next invocation (b).} +\label{preserve_example} +\end{figure} + +Since scanners attempt to make the longest possible match of input, in some +cases they are not able to identify a token upon parsing its final character, +they must wait for a lookahead character. For example if trying to match words, +the token match must be triggered on following whitespace in case more +characters of the word have yet to come. The user must therefore arrange for an +EOF character to be sent to the scanner to flush out any token that has not yet +been matched. The user can exclude a single character from the entire scanner +and use this character as the EOF character, possibly specifying an EOF action. +For most scanners, zero is a suitable choice for the EOF character. + +Alternatively, if whitespace is not significant and ignored by the scanner, the +final real token can be flushed out by simply sending an additional whitespace +character on the end of the stream. If the real stream ends with whitespace +then it will simply be extended and ignored. If it does not, then the last real token is +guaranteed to be flushed and the dummy EOF whitespace ignored. +An example scanner processing loop is given in Figure \ref{scanner-loop}. + +\begin{figure} +\small +\begin{verbatim} + int have = 0; + bool done = false; + while ( !done ) { + /* How much space is in the buffer? */ + int space = BUFSIZE - have; + if ( space == 0 ) { + /* Buffer is full. */ + cerr << "TOKEN TOO BIG" << endl; + exit(1); + } + + /* Read in a block after any data we already have. */ + char *p = inbuf + have; + cin.read( p, space ); + int len = cin.gcount(); + + /* If no data was read, send the EOF character. + if ( len == 0 ) { + p[0] = 0, len++; + done = true; + } + + char *pe = p + len; + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + if ( tokstart == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - tokstart; + memmove( inbuf, tokstart, have ); + tokend = inbuf + (tokend-tokstart); + tokstart = inbuf; + } + } +\end{verbatim} +\caption{A processing loop for a scanner.} +\label{scanner-loop} +\end{figure} + + +\section{Write Statement} +\label{write-statement} + +\begin{verbatim} +write <component> [options]; +\end{verbatim} +\verbspace + + +The write statement is used to generate parts of the machine. +There are four +components that can be generated by a write statement. These components are the +state machine's data, initialization code, execution code and EOF action +execution code. A write statement may appear before a machine is fully defined. +This allows one to write out the data first then later define the machine where +it is used. An example of this is show in Figure \ref{fbreak-example}. + +\subsection{Write Data} +\begin{verbatim} +write data [options]; +\end{verbatim} +\verbspace + +The write data statement causes Ragel to emit the constant static data needed +by the machine. In table-driven output styles (see Section \ref{genout}) this +is a collection of arrays that represent the states and transitions of the +machine. In goto-driven machines much less data is emitted. At the very +minimum a start state \verb|name_start| is generated. All variables written +out in machine data have both the \verb|static| and \verb|const| properties and +are prefixed with the name of the machine and an +underscore. The data can be placed inside a class, inside a function, or it can +be defined as global data. + +Two variables are written that may be used to test the state of the machine +after a buffer block has been processed. The \verb|name_error| variable gives +the id of the state that the machine moves into when it cannot find a valid +transition to take. The machine immediately breaks out of the processing loop when +it finds itself in the error state. The error variable can be compared to the +current state to determine if the machine has failed to parse the input. If the +machine is complete, that is from every state there is a transition to a proper +state on every possible character of the alphabet, then no error state is required +and this variable will be set to -1. + +The \verb|name_first_final| variable stores the id of the first final state. All of the +machine's states are sorted by their final state status before having their ids +assigned. Checking if the machine has accepted its input can then be done by +checking if the current state is greater-than or equal to the first final +state. + +Data generation has several options: + +\begin{itemize} +\item \verb|noerror| - Do not generate the integer variable that gives the +id of the error state. +\item \verb|nofinal| - Do not generate the integer variable that gives the +id of the first final state. +\item \verb|noprefix| - Do not prefix the variable names with the name of the +machine. +\end{itemize} + +\subsection{Write Init} +\begin{verbatim} +write init; +\end{verbatim} +\verbspace + +The write init statement causes Ragel to emit initialization code. This should +be executed once before the machine is started. At a very minimum this sets the +current state to the start state. If other variables are needed by the +generated code, such as call +stack variables or longest-match management variables, they are also +initialized here. + +\subsection{Write Exec} +\begin{verbatim} +write exec [options]; +\end{verbatim} +\verbspace + +The write exec statement causes Ragel to emit the state machine's execution code. +Ragel expects several variables to be available to this code. At a very minimum, the +generated code needs access to the current character position \verb|p|, the ending +position \verb|pe| and the current state \verb|cs|, though \verb|pe| +can be excluded by specifying the \verb|noend| write option. +The \verb|p| variable is the cursor that the execute code will +used to traverse the input. The \verb|pe| variable should be set up to point to one +position past the last valid character in the buffer. + +Other variables are needed when certain features are used. For example using +the \verb|fcall| or \verb|fret| statements requires \verb|stack| and +\verb|top| variables to be defined. If a longest-match construction is used, +variables for managing backtracking are required. + +The write exec statement has one option. The \verb|noend| option tells Ragel +to generate code that ignores the end position \verb|pe|. In this +case the user must explicitly break out of the processing loop using +\verb|fbreak|, otherwise the machine will continue to process characters until +it moves into the error state. This option is useful if one wishes to process a +null terminated string. Rather than traverse the string to discover then length +before processing the input, the user can break out when the null character is +seen. The example in Figure \ref{fbreak-example} shows the use of the +\verb|noend| write option and the \verb|fbreak| statement for processing a string. + +\begin{figure} +\small +\begin{verbatim} +#include <stdio.h> +%% machine foo; +int main( int argc, char **argv ) +{ + %% write data noerror nofinal; + int cs, res = 0; + if ( argc > 1 ) { + char *p = argv[1]; + %%{ + main := + [a-z]+ + 0 @{ res = 1; fbreak; }; + write init; + write exec noend; + }%% + } + printf("execute = %i\n", res ); + return 0; +} +\end{verbatim} +\caption{Use of {\tt noend} write option and the {\tt fbreak} statement for +processing a string.} +\label{fbreak-example} +\end{figure} + + +\subsection{Write EOF Actions} +\begin{verbatim} +write eof; +\end{verbatim} +\verbspace + +The write EOF statement causes Ragel to emit code that executes EOF actions. +This write statement is only relevant if EOF actions have been embedded, +otherwise it does not generate anything. The EOF action code requires access to +the current state. + +\section{Referencing Names} +\label{labels} + +This section describes how to reference names in epsilon transitions and +action-based control-flow statements such as \verb|fgoto|. There is a hierarchy +of names implied in a Ragel specification. At the top level are the machine +instantiations. Beneath the instantiations are labels and references to machine +definitions. Beneath those are more labels and references to definitions, and +so on. + +Any name reference may contain multiple components separated with the \verb|::| +compound symbol. The search for the first component of a name reference is +rooted at the join expression that the epsilon transition or action embedding +is contained in. If the name reference is not not contained in a join, +the search is rooted at the machine definition that that the epsilon transition or +action embedding is contained in. Each component after the first is searched +for beginning at the location in the name tree that the previous reference +component refers to. + +In the case of action-based references, if the action is embedded more than +once, the local search is performed for each embedding and the result is the +union of all the searches. If no result is found for action-based references then +the search is repeated at the root of the name tree. Any action-based name +search may be forced into a strictly global search by prefixing the name +reference with \verb|::|. + +The final component of the name reference must resolve to a unique entry point. +If a name is unique in the entire name tree it can be referenced as is. If it +is not unique it can be specified by qualifying it with names above it in the +name tree. However, it can always be renamed. + +% FIXME: Should fit this in somewhere. +% Some kinds of name references are illegal. Cannot call into longest-match +% machine, can only call its start state. Cannot make a call to anywhere from +% any part of a longest-match machine except a rule's action. This would result +% in an eventual return to some point inside a longest-match other than the +% start state. This is banned for the same reason a call into the LM machine is +% banned. + +\section{State Machine Minimization} + +State machine minimization is the process of finding the minimal equivalent FSM accepting +the language. Minimization reduces the number of states in machines +by merging equivalent states. It does not change the behaviour of the machine +in any way. It will cause some states to be merged into one because they are +functionally equivalent. State minimization is on by default. It can be turned +off with the \verb|-n| option. + +The algorithm implemented is similar to Hopcroft's state minimization +algorithm. Hopcroft's algorithm assumes a finite alphabet that can be listed in +memory, whereas Ragel supports arbitrary integer alphabets that cannot be +listed in memory. Though exact analysis is very difficult, Ragel minimization +runs close to $O(n \times log(n))$ and requires $O(n)$ temporary storage where +$n$ is the number of states. + +\chapter{User Actions} + +\section{Embedding Actions} + +\begin{verbatim} +action ActionName { + /* Code an action here. */ + count += 1; +} +\end{verbatim} +\verbspace + +The action statement defines a block of code that can be embedded into an FSM. +Action names can be referenced by the action embedding operators in +expressions. Though actions need not be named in this way (literal blocks +of code can be embedded directly when building machines), defining reusable +blocks of code whenever possible is good practice because it potentially increases the +degree to which the machine can be minimized. Within an action some Ragel expressions +and statements are parsed and translated. These allow the user to interact with the machine +from action code. See Section \ref{vals} for a complete list of statements and +values available in code blocks. + +\subsection{Entering Action} + +\verb|expr > action| +\verbspace + +The entering operator embeds an action into the starting transitions. The +action is executed on all transitions that enter into the machine from the +start state. If the start state is a final state then it is possible for the +machine to never be entered and the starting transitions bypassed. In the +following example, the action is executed on the first transition of the +machine. If the repetition machine is bypassed the action is not executed. + +\verbspace +\begin{verbatim} +# Execute A at the beginning of a string of alpha. +main := ( lower* >A ) . ' '; +\end{verbatim} + +\begin{center} +\includegraphics{exstact} +\end{center} + +\subsection{Finishing Action} + +\verb|expr @ action| +\verbspace + +The finishing action operator embeds an action into any transitions that go into a +final state. Whether or not the machine accepts is not determined at the point +the action is executed. Further input may move the machine out of the accepting +state, but keep it in the machine. As in the following example, the +into-final-state operator is most often used when no lookahead is necessary. + +\verbspace +\begin{verbatim} +# Execute A when the trailing space is seen. +main := ( lower* ' ' ) @A; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exdoneact} +\end{center} + +\subsection{All Transition Action} + +\verb|expr $ action| +\verbspace + +The all transition operator embeds an action into all transitions of a machine. +The action is executed whenever a transition of the machine is taken. In the +following example, A is executed on every character matched. + +\verbspace +\begin{verbatim} +# Execute A on any characters of machine one or two. +main := ( 'm1' | 'm2' ) $A; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exallact} +\end{center} + +\subsection{Pending Out Actions} +\label{out-actions} + +\verb|expr % action| +\verbspace + +The pending out action operator embeds an action into the pending out +transitions of a machine. The action is first embedded into the final states of +the machine and later transferred to any transitions made going out of the +machine. The transfer can be caused either by a concatenation or kleene star +operation. This mechanism allows one to associate an action with the +termination of a sequence, without being concerned about what particular +character terminates the sequence. In the following example, A is executed +when leaving the alpha machine by the newline character. + +\verbspace +\begin{verbatim} +# Match a word followed by an newline. Execute A when +# finishing the word. +main := ( lower+ %A ) . '\n'; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exfinact} +\end{center} +\graphspace + + +In the following example, the \verb|term_word| action could be used to register +the appearance of a word and to clear the buffer that the \verb|lower| action used +to store the text of it. + +\verbspace +\begin{verbatim} +word = ( [a-z] @lower )+ %term_word; +main := word ( ' ' @space word )* '\n' @newline; +\end{verbatim} +\verbspace + +% FIXME: add +%\begin{center} +%\includegraphics[scale=0.4]{outact.ps} +%\end{center} + +In this final example of the action embedding operators, A is executed upon +entering the alpha machine, B is executed on all transitions of the alpha +machine, C is executed when the alpha machine accepts by moving into the +newline machine and N is executed when the newline machine moves into a final +state. + +\verbspace +\begin{verbatim} +# Execute A on starting the alpha machine, B on every transition +# moving through it and C upon finishing. Execute N on the newline. +main := ( lower* >A $B %C ) . '\n' @N; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exaction} +\end{center} + +\section{State Action Embedding Operators} + +The state embedding operators allow one to embed actions into states. Like the +transition embedding operators, there are several different classes of states +that the operators access. The meanings of the symbols are partially related to +the meanings of the symbols used by the transition embedding operators. + +The state embedding operators are different from the transition embedding +operators in that there are various kinds of events that embedded actions can +be associated with, requiring them to be distinguished by these different types +of events. The state embedding operators have two components. The first, which +is the first one or two characters, specifies the class of states that the +action will be embedded into. The second component specifies the type of event +the action will be executed on. + +\def\fakeitem{\hspace*{12pt}$\bullet$\hspace*{10pt}} + +\begin{minipage}{\textwidth} +\begin{multicols}{2} +\raggedcolumns +\noindent The different classes of states are:\\ +\fakeitem \verb|> | -- the start state \\ +\fakeitem \verb|$ | -- all states\\ +\fakeitem \verb|% | -- final states\\ +\fakeitem \verb|< | -- any state except the start state\\ +\fakeitem \verb|@ | -- any state except final states\\ +\fakeitem \verb|<>| -- any except start and final (middle) + +\columnbreak + +\noindent The different kinds of embeddings are:\\ +\fakeitem \verb|~| -- to-state actions\\ +\fakeitem \verb|*| -- from-state actions\\ +\fakeitem \verb|/| -- EOF actions\\ +\fakeitem \verb|!| -- error actions\\ +\fakeitem \verb|^| -- local error actions\\ +\end{multicols} +\end{minipage} +%\label{state-act-embed} +%\caption{The two components of state embedding operators. The class of states +%to select comes first, followed by the type of embedding.} +% +%\begin{figure}[t] +%\centering +%\includegraphics{stembed} +%\caption{Summary of state manipulation operators} +%\label{state-act-embed-chart} +%\end{figure} + +%\noindent Putting these two components together we get a matrix of state +%embedding operators. The entire set is given in Figure \ref{state-act-embed-chart}. + + +\subsection{To-State and From-State Actions} + +\subsubsection{To-State Actions} + +\verb| >~ $~ %~ <~ @~ <>~ | +\verbspace + +To-state actions are executed whenever the state machine moves into the +specified state, either by a natural movement over a transition or by an +action-based transfer of control such as \verb|fgoto|. They are executed after the +in-transition's actions but before the current character is advanced and +tested against the end of the input block. To-state embeddings stay with the +state. They are irrespective of the state's current set of transitions and any +future transitions that may be added in or out of the state. + +Note that the setting of the current state variable \verb|cs| outside of the +execute code is not considered by Ragel as moving into a state and consequently +the to-state actions of the new current state are not executed. This includes +the initialization of the current state when the machine begins. This is +because the entry point into the machine execution code is after the execution +of to-state actions. + +\subsubsection{From-State Actions} + +\verb| >* $* %* <* @* <>* | +\verbspace + +From-state actions are executed whenever the state machine takes a transition from a +state, either to itself or to some other state. These actions are executed +immediately after the current character is tested against the input block end +marker and before the transition to take is sought based on the current +character. From-state actions are therefore executed even if a transition +cannot be found and the machine moves into the error state. Like to-state +embeddings, from-state embeddings stay with the state. + +\subsection{EOF Actions} + +\verb| >/ $/ %/ </ @/ <>/ | +\verbspace + +The EOF action embedding operators enable the user to embed EOF actions into +different classes of +states. EOF actions are stored in states and generated with the \verb|write eof| +statement. The generated EOF code switches on the current state and executes the EOF +actions associated with it. + +\subsection{Handling Errors} + +\subsubsection{Global Error Actions} + +\verb| >! $! %! <! @! <>! | +\verbspace + +Error actions are stored in states until the final state machine has been fully +constructed. They are then transferred to the transitions that move into the +error state. This transfer entails the creation of a transition from the state +to the error state that is taken on all input characters which are not already +covered by the state's transitions. In other words it provides a default +action. Error actions can induce a recovery by altering \verb|p| and then jumping back +into the machine with \verb|fgoto|. + +\subsubsection{Local Error Actions} + +\verb| >^ $^ %^ <^ @^ <>^ | +\verbspace + +Like global error actions, local error actions are also stored in states until +a transfer point. The transfer point is different however. Each local error action +embedding is associated with a name. When a machine definition has been fully +constructed, all local error actions embeddings associated the same name as the +machine are transferred to error transitions. Local error actions can be used +to specify an action to take when a particular section of a larger state +machine fails to make a match. A particular machine definition's ``thread'' may +die and the local error actions executed, however the machine as a whole may +continue to match input. + +There are two forms of local error action embeddings. In the first form the name defaults +to the current machine. In the second form the machine name can be specified. This +is useful when it is more convenient to specify the local error action in a +sub-definition that is used to construct the machine definition where the +transfer should happen. To embed local error actions and explicitly state the +machine on which the transfer is to happen use \verb|(name, action)| as the +action. + +\begin{comment} +\begin{itemize} +\setlength{\parskip}{0in} +\item \verb|expr >^ (name, action) | -- Start state. +\item \verb|expr $^ (name, action) | -- All states. +\item \verb|expr %^ (name, action) | -- Final states. +\item \verb|expr <^ (name, action) | -- Not start state. +\item \verb|expr <>^ (name, action)| -- Not start and not final states. +\end{itemize} +\end{comment} + +\section{Action Ordering and Duplicates} + +When building a parser by combining smaller expressions which themselves have +embedded actions, it is often the case that transitions are made which need to +execute a number of actions on one input character. For example when we leave +an expression, we may execute the expression's pending out action and the +subsequent expression's starting action on the same input character. We must +therefore devise a method for ordering actions that is both intuitive and +predictable for the user and repeatable by the state machine compiler. The +determinization processes cannot simply order actions by the time at which they +are introduced into a transition -- otherwise the programmer will be at the +mercy of luck. + +We associate with the embedding of each action a distinct timestamp which is +used to order actions that appear together on a single transition in the final +compiled state machine. To accomplish this we traverse the parse tree of +regular expressions and assign timestamps to action embeddings. This algorithm +is recursive in nature and quite simple. When it visits a parse tree node it +assigns timestamps to all {\em starting} action embeddings, recurses on the +parse tree, then assigns timestamps to the remaining {\em all}, {\em +finishing}, and {\em leaving} embeddings in the order in which they appear. + +Ragel does not permit actions (defined or unnamed) to appear multiple times in +an action list. When the final machine has been created, actions which appear +more than once in single transition or EOF action list have their duplicates +removed. The first appearance of the action is preserved. This is useful in a +number of scenarios. First, it allows us to union machines with common +prefixes without worrying about the action embeddings in the prefix being +duplicated. Second, it prevents pending out actions from being transferred multiple times +when a concatenation follows a kleene star and the two machines begin with a common +character. + +\verbspace +\begin{verbatim} +word = [a-z]+ %act; +main := word ( '\n' word )* '\n\n'; +\end{verbatim} + +\section{Values and Statements Available in Code Blocks} +\label{vals} + +\noindent The following values are available in code blocks: + +\begin{itemize} +\item \verb|fpc| -- A pointer to the current character. This is equivalent to +accessing the \verb|p| variable. + +\item \verb|fc| -- The current character. This is equivalent to the expression \verb|(*p)|. + +\item \verb|fcurs| -- An integer value representing the current state. This +value should only be read from. To move to a different place in the machine +from action code use the \verb|fgoto|, \verb|fnext| or \verb|fcall| statements. +Outside of the machine execution code the \verb|cs| variable may be modified. + +\item \verb|ftargs| -- An integer value representing the target state. This +value should only be read from. Again, \verb|fgoto|, \verb|fnext| and +\verb|fcall| can be used to move to a specific entry point. + +\item \verb|fentry(<label>)| -- Retrieve an integer value representing the +entry point \verb|label|. The integer value returned will be a compile time +constant. This number is suitable for later use in control flow transfer +statements that take an expression. This value should not be compared against +the current state because any given label can have multiple states representing +it. The value returned by \verb|fentry| will be one of the possibly multiple states the +label represents. +\end{itemize} + +\noindent The following statements are available in code blocks: + +\begin{itemize} + +\item \verb|fhold;| -- Do not advance over the current character. If processing +data in multiple buffer blocks, the \verb|fhold| statement should only be used +once in the set of actions executed on a character. Multiple calls may result +in backing up over the beginning of the buffer block. The \verb|fhold| +statement does not imply any transfer of control. In actions embedded into +transitions, it is equivalent to the \verb|p--;| statement. In scanner pattern +actions any changes made to \verb|p| are lost. In this context, \verb|fhold| is +equivalent to \verb|tokend--;|. + +\item \verb|fexec <expr>;| -- Set the next character to process. This can be +used to backtrack to previous input or advance ahead. +Unlike \verb|fhold|, which can be used +anywhere, \verb|fexec| requires the user to ensure that the target of the +backtrack is in the current buffer block or is known to be somewhere ahead of +it. The machine will continue iterating forward until \verb|pe| is arrived, +\verb|fbreak| is called or the machine moves into the error state. In actions +embedded into transitions, the \verb|fexec| statement is equivalent to setting +\verb|p| to one position ahead of the next character to process. If the user +also modifies \verb|pe|, it is possible to change the buffer block entirely. +In scanner pattern actions any changes made to \verb|p| are lost. In this +context, \verb|fexec| is equivalent to setting \verb|tokend| to the next +character to process. + +\item \verb|fgoto <label>;| -- Jump to an entry point defined by +\verb|<label>|. The \verb|fgoto| statement immediately transfers control to +the destination state. + +\item \verb|fgoto *<expr>;| -- Jump to an entry point given by \verb|<expr>|. +The expression must evaluate to an integer value representing a state. + +\item \verb|fnext <label>;| -- Set the next state to be the entry point defined +by \verb|label|. The \verb|fnext| statement does not immediately jump to the +specified state. Any action code following the statement is executed. + +\item \verb|fnext *<expr>;| -- Set the next state to be the entry point given +by \verb|<expr>|. The expression must evaluate to an integer value representing +a state. + +\item \verb|fcall <label>;| -- Push the target state and jump to the entry +point defined by \verb|<label>|. The next \verb|fret| will jump to the target +of the transition on which the call was made. Use of \verb|fcall| requires +the declaration of a call stack. An array of integers named \verb|stack| and a +single integer named \verb|top| must be declared. With the \verb|fcall| +construct, control is immediately transferred to the destination state. + +\item \verb|fcall *<expr>;| -- Push the current state and jump to the entry +point given by \verb|<expr>|. The expression must evaluate to an integer value +representing a state. + +\item \verb|fret;| -- Return to the target state of the transition on which the +last \verb|fcall| was made. Use of \verb|fret| requires the declaration of a +call stack with \verb|fstack| in the struct block. Control is immediately +transferred to the destination state. + +\item \verb|fbreak;| -- Save the current state and immediately break out of the +execute loop. This statement is useful in conjunction with the \verb|noend| +write option. Rather than process input until the end marker of the input +buffer is arrived at, the fbreak statement can be used to stop processing input +upon seeing some end-of-string marker. It can also be used for handling +exceptional circumstances. The fbreak statement does not change the pointer to +the current character. After an \verb|fbreak| call the \verb|p| variable will point to +the character that was being traversed over when the action was +executed. The current state will be the target of the current transition. + +\end{itemize} + +\noindent {\bf Note:} Once actions with control-flow commands are embedded into a +machine, the user must exercise caution when using the machine as the operand +to other machine construction operators. If an action jumps to another state +then unioning any transition that executes that action with another transition +that follows some other path will cause that other path to be lost. Using +commands that manually jump around a machine takes us out of the domain of +regular languages because transitions that may be conditional and that the +machine construction operators are not aware of are introduced. These +commands should therefore be used with caution. + + +\chapter{Controlling Nondeterminism} +\label{controlling-nondeterminism} + +Along with the flexibility of arbitrary action embeddings comes a need to +control nondeterminism in regular expressions. If a regular expression is +ambiguous, then sup-components of a parser other than the intended parts may be +active at any given time. This means that actions which are irrelevant to the +current subset of the parser may be executed, causing problems for the +programmer. + +Tools which are based on regular expression engines and which are used for +recognition tasks will usually function as intended regardless of the presence +of ambiguities. It is quite common for users of scripting languages to write +regular expressions that are heavily ambiguous and it generally does not +matter. As long as one of the potential matches is recognized, there can be any +number of other matches present. + +In some systems, matched text is attributed to a portion of a regular +expression. Armed with the knowledge that the regular expression engine always +pursues the longest match or the shortest match, the user is able to compose +their patterns accordingly. + +In Ragel, there is no regular expression run-time engine, just a simple state +machine execution model. When we begin to embed actions and face the +possibility of spurious action execution, it becomes clear that controlling +nondeterminism at the machine construction level is very important. Consider +the following example. + +\verbspace +\begin{verbatim} +lines = ( word ( space word )* '\n' )*; +\end{verbatim} +\verbspace + +Since the \verb|space| built-in expression includes the newline character, we will +not leave the line expression when a newline character is seen. We will +simultaneously pursue the possibility of matching further words on the same +line and the possibility of matching a second line. The solution here is easy: +simply exclude the newline character from the \verb|space| expression. Solving +this kind of problem is straightforward because the string that terminates the +sequence is a single character long. When it is multiple characters long we +have a more difficult problem, as shown by the following example. + +\verbspace +\begin{verbatim} +comment = '/*' any* '*/'; +\end{verbatim} +\verbspace + +Using standard concatenation, we will never leave the \verb|any*| expression. +We will forever entertain the possibility that a \verb|'*/'| string that we see +is contained in a longer comment and that, simultaneously, the comment has +ended. One way to approach the problem is to exclude the terminating string +from the \verb|any*| expression using set difference. We must be careful to +exclude not just the terminating string, but any string that contains it as a +substring. A verbose, but proper specification of a C comment parser is given +by the following regular expression. Note that this operation is the basis of the +strong subtraction operator. + +\verbspace +\begin{verbatim} +comment = '/*' ( any* - ( any* '*/' any* ) ) '*/'; +\end{verbatim} +\verbspace + +We can also phrase the problem in terms of the transitions of the state +machines that implement these expressions. During the concatenation of +\verb|any*| and \verb|'*/'| we will be making transitions that are composed of +both the loop of the first expression and the characters of the second. +At this time we want the transition on the \verb|'/'| character to take precedence +over and disallow the transition that originated in the \verb|any*| loop. + +In another scenario, we wish to implement a lightweight tokenizer that we can +utilize in the composition of a larger machine. For example, some HTTP headers +have a token stream as a sub-language. + +\verbspace +\begin{verbatim} +header_contents = ( lower+ | digit+ | ' ' )*; +\end{verbatim} +\verbspace + +In this case, the problem with using a standard kleene star operation is that +there is an ambiguity between extending a token and wrapping around the +machine to begin a new token. Using the standard operator, we get +an undesirable nondeterministic behaviour. What is required is for the +transitions that represent an extension of a token to take precedence over the +transitions that represent the beginning of a new token. For this problem, +there is no simple solution that uses standard regular expressions. + +\section{Priorities} + +A priority mechanism was devised and built into the determinization +process, specifically for the purpose of allowing the user to control +nondeterminism. Priorities are integer values embedded into transitions. When +the determinization process is combining transitions that have different +priorities, the transition with the higher priority is preserved and the +transition with the lower priority is dropped. + +Unfortunately, priorities can have unintended side effects because their +operation requires that they linger in transitions indefinitely. They must linger +because the Ragel program cannot know when the user is finished with a priority +embedding. A solution whereby they are explicitly deleted after use is +conceivable; however this is not very user-friendly. Priorities were therefore +made into named entities. Only priorities with the same name are allowed to +interact. This allows any number of priorities to coexist in one machine for +the purpose of controlling various different regular expression operations and +eliminates the need to ever delete them. Such a scheme allows the user to +choose a unique name, embed two different priority values using that name +and be confident that the priority embedding will be free of any side effects. + +\section{Priority Assignment} + +Priorities are integer values assigned to names within transitions. +Only priorities with the same name are allowed to interact. When the machine +construction process is combining transitions that have different priorities +assiged to the same name, the transition with the higher priority is preserved +and the lower priority is dropped. + +In the first form of priority embedding the name defaults to the name of the machine +definition that the priority is assigned in. In this sense priorities are by +default local to the current machine definition or instantiation. Beware of +using this form in a longest-match machine, since there is only one name for +the entire set of longest match patterns. In the second form the priority's +name can be specified, allowing priority interaction across machine definition +boundaries. + +\begin{itemize} +\setlength{\parskip}{0in} +\item \verb|expr > int| -- Sets starting transitions to have priority int. +\item \verb|expr @ int| -- Sets transitions that go into a final state to have priority int. +\item \verb|expr $ int| -- Sets all transitions to have priority int. +\item \verb|expr % int| -- Sets pending out transitions from final states to +have priority int.\\ When a transition is made going out of the machine (either +by concatenation or kleene star) its priority is immediately set to the pending +out priority. +\end{itemize} + +The second form of priority assignment allows the programmer to specify the name +to which the priority is assigned. + +\begin{itemize} +\setlength{\parskip}{0in} +\item \verb|expr > (name, int)| -- Entering transitions. +\item \verb|expr @ (name, int)| -- Transitions into final state. +\item \verb|expr $ (name, int)| -- All transitions. +\item \verb|expr % (name, int)| -- Pending out transitions. +\end{itemize} + +\section{Guarded Operators that Encapsulate Priorities} + +Priorities can be very confusing for the user. They force the user to imagine +the transitions inside machines and work out the precise effects of regular +expression operations. When we consider that this problem is worsened by the +potential for side effects caused by unintended priority name collisions, we +see that exposing the user to priorities is rather undesirable. + +Fortunately, in practice the use of priorities has been necessary only in a +small number of scenarios. This allows us to encapsulate their functionality +into a small set of operators and fully hide them from the user. This is +advantageous from a language design point of view because it greatly simplifies +the design. + +\begin{comment} +Example from 2 page poster paper. +% GENERATE: lmkleene +% %%{ +% machine lmkleene; +% action id {} +% action number {} +% action ws {} +% action mark {} +\begin{verbatim} +main := ( lower+ ':' ' '* <: ( + ( lower ( lower | digit )* ) >mark %id | + digit+ >mark %number | + ' '+ >mark %ws +)** '\n' )*; +\end{verbatim} +% }%% +% END GENERATE + +% FIXME: Add +%\begin{center} +%\includegraphics[scale=0.4]{lmkleene.ps} +%\end{center} +\end{comment} + +\subsection{Entry-Guarded Contatenation} + +\verb|expr :> expr| +\verbspace + +This operator concatenates two machines, but first assigns a low +priority to all transitions +of the first machine and a high priority to the entering transitions of the +second machine. This operator is useful if from the final states of the first +machine, it is possible to accept the characters in the start transitions of +the second machine. This operator effectively terminates the first machine +immediately upon entering the second machine, where otherwise they would be +pursued concurrently. In the following example, entry-guarded concatenation is +used to move out of a machine that matches everything at the first sign of an +end-of-input marker. + +\verbspace +\begin{verbatim} +# Leave the catch-all machine on the first character of FIN. +main := any* :> 'FIN'; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exstpri} +\end{center} +\graphspace + +Entry-guarded concatenation is equivalent to the following: + +\verbspace +\begin{verbatim} +expr $(unique_name,0) . expr >(unique_name,1) +\end{verbatim} + +\subsection{Finish-Guarded Contatenation} + +\verb|expr :>> expr| +\verbspace + +This operator is +like the previous operator, except the higher priority is placed on the final +transitions of the second machine. This is useful if one wishes to entertain +the possibility of continuing to match the first machine right up until the +second machine enters a final state. In other words it terminates the first +machine only when the second accepts. In the following example, finish-guarded +concatenation causes the move out of the machine that matches everything to be +delayed until the full end-of-input marker has been matched. + +\verbspace +\begin{verbatim} +# Leave the catch-all machine on the last character of FIN. +main := any* :>> 'FIN'; +\end{verbatim} +\verbspace + +\begin{center} +\includegraphics{exdonepri} +\end{center} +\graphspace + +Finish-guarded concatenation is equivalent to the following: + +\verbspace +\begin{verbatim} +expr $(unique_name,0) . expr @(unique_name,1) +\end{verbatim} + +\subsection{Left-Guarded Concatenation} + +\verb|expr <: expr| +\verbspace + +This operator places +a higher priority on the left expression. It is useful if you want to prefix a +sequence with another sequence composed of some of the same characters. For +example, one can consume leading whitespace before tokenizing a sequence of +whitespace-separated words as in: + +\verbspace +\begin{verbatim} +( ' '* <: ( ' '+ | [a-z]+ )** ) +\end{verbatim} +\verbspace + +Left-guarded concatenation is equivalent to the following: + +\verbspace +\begin{verbatim} +expr $(unique_name,1) . expr >(unique_name,0) +\end{verbatim} +\verbspace + +\subsection{Longest-Match Kleene Star} +\label{longest_match_kleene_star} + +\verb|expr**| +\verbspace + +This version of kleene star puts a higher priority on staying in the +machine versus wrapping around and starting over. The LM kleene star is useful +when writing simple tokenizers. These machines are built by applying the +longest-match kleene star to an alternation of token patterns, as in the +following. + +\verbspace +\begin{verbatim} +# Repeat tokens, but make sure to get the longest match. +main := ( + lower ( lower | digit )* %A | + digit+ %B | + ' ' +)**; +\end{verbatim} + +\verbspace + +\begin{center} +\includegraphics{exfinpri} +\end{center} +\graphspace + +If a regular kleene star were used the machine above would not be able to +distinguish between extending a word and beginning a new one. This operator is +equivalent to: + +\verbspace +\begin{verbatim} +( expr $(unique_name,1) %(unique_name,0) )* +\end{verbatim} +\verbspace + +When the kleene star is applied, transitions are made out of the machine which +go back into it. These are assigned a priority of zero by the pending out +transition mechanism. This is less than the priority of the transitions out of +the final states that do not leave the machine. When two transitions clash on +the same character, the differing priorities causes the transition which +stays in the machine to take precedence. The transition that wraps around is +dropped. + +Note that this operator does not build a scanner in the traditional sense because +there is never any backtracking. To build a scanner in the traditional sense +use the Longest-Match machine construction described Section \ref{generating-scanners}. + +\chapter{Interface to Host Program} + +\section{Alphtype Statement} + +\begin{verbatim} +alphtype unsigned int; +\end{verbatim} +\verbspace + +The alphtype statement specifies the alphabet data type that the machine +operates on. During the compilation of the machine, integer literals are expected to +be in the range of possible values of the alphtype. Supported alphabet types +are \verb|char|, \verb|unsigned char|, \verb|short|, \verb|unsigned short|, +\verb|int|, \verb|unsigned int|, \verb|long|, and \verb|unsigned long|. +The default is \verb|char|. + +\section{Getkey Statement} + +\begin{verbatim} +getkey fpc->id; +\end{verbatim} +\verbspace + +Specify to Ragel how to retrieve the character that the machine operates on +from the pointer to the current element (\verb|p|). Any expression that returns +a value of the alphabet type +may be used. The getkey statement may be used for looking into element +structures or for translating the character to process. The getkey expression +defaults to \verb|(*p)|. In goto-driven machines the getkey expression may be +evaluated more than once per element processed, therefore it should not incur a +large cost and preclude optimization. + +\section{Access Statement} + +\begin{verbatim} +access fsm->; +\end{verbatim} +\verbspace + +The access statement allows one to tell Ragel how the generated code should +access the machine data that is persistent across processing buffer blocks. +This includes all variables except \verb|p| and \verb|pe|. This includes +\verb|cs|, \verb|top|, \verb|stack|, \verb|tokstart|, \verb|tokend| and \verb|act|. +This is useful if a machine is to be encapsulated inside a +structure in C code. The access statement can be used to give the name of +a pointer to the structure. + +\section{Maintaining Pointers to Input Data} + +In the creation of any parser it is not uncommon to require the collection of +the data being parsed. It is always possible to collect data into a growable +buffer as the machine moves over it, however the copying of data is a somewhat +wasteful use of processor cycles. The most efficient way to collect data +from the parser is to set pointers into the input. This poses a problem for +uses of Ragel where the input data arrives in blocks, such as over a socket or +from a file. The program will error if a pointer is set in one buffer block but +must be used while parsing a following buffer block. + +The longest-match constructions exhibit this problem, requiring the maintenance +code described in Section \ref{generating-scanners}. If a longest-match +construction has been used somewhere in the machine then it is possible to +take advantage of the required prefix maintenance code in the driver program to +ensure pointers to the input are always valid. If laying down a pointer one can +set \verb|tokstart| at the same spot or ahead of it. When data is shifted in +between loops the user must also shift the pointer. In this way it is possible +to maintain pointers to the input that will always be consistent. + +\begin{figure} +\small +\begin{verbatim} + int have = 0; + while ( 1 ) { + char *p, *pe, *data = buf + have; + int len, space = BUFSIZE - have; + + if ( space == 0 ) { + fprintf(stderr, "BUFFER OUT OF SPACE\n"); + exit(1); + } + + len = fread( data, 1, space, stdin ); + if ( len == 0 ) + break; + + /* Find the last newline by searching backwards. */ + p = buf; + pe = data + len - 1; + while ( *pe != '\n' && pe >= buf ) + pe--; + pe += 1; + + %% write exec; + + /* How much is still in the buffer? */ + have = data + len - pe; + if ( have > 0 ) + memmove( buf, pe, have ); + + if ( len < space ) + break; + } +\end{verbatim} +\caption{An example of line-oriented processing.} +\label{line-oriented} +\end{figure} + +In general, there are two approaches for guaranteeing the consistency of +pointers to input data. The first approach is the one just described; +lay down a marker from an action, +then later ensure that the data the marker points to is preserved ahead of +the buffer on the next execute invocation. This approach is good because it +allows the parser to decide on the pointer-use boundaries, which can be +arbitrarily complex parsing conditions. A downside is that it requires any +pointers that are set to be corrected in between execute invocations. + +The alternative is to find the pointer-use boundaries before invoking the execute +routine, then pass in the data using these boundaries. For example, if the +program must perform line-oriented processing, the user can scan backwards from +the end of an input block that has just been read in and process only up to the +first found newline. On the next input read, the new data is placed after the +partially read line and processing continues from the beginning of the line. +An example of line-oriented processing is given in Figure \ref{line-oriented}. + + +\section{Running the Executables} + +Ragel is broken down into two executables: a frontend which compiles machines +and emits them in an XML format, and a backend which generates code or a +Graphviz Dot file from the XML data. The purpose of the XML-based intermediate +format is to allow users to inspect their compiled state machines and to +interface Ragel to other tools such as custom visualizers, code generators or +analysis tools. The intermediate format will provide a better platform for +extending Ragel to support new host languages. The split also serves to reduce +complexity of the Ragel program by strictly separating the data structures and +algorithms that are used to compile machines from those that are used to +generate code. + +\verbspace +\begin{verbatim} +[user@host] myproj: ragel file.rl | rlcodegen -G2 -o file.c +\end{verbatim} + +\section{Choosing a Generated Code Style} +\label{genout} + +There are three styles of code output to choose from. Code style affects the +size and speed of the compiled binary. Changing code style does not require any +change to the Ragel program. There are two table-driven formats and a goto +driven format. + +In addition to choosing a style to emit, there are various levels of action +code reuse to choose from. The maximum reuse levels (\verb|-T0|, \verb|-F0| +and \verb|-G0|) ensure that no FSM action code is ever duplicated by encoding +each transition's action list as static data and iterating +through the lists on every transition. This will normally result in a smaller +binary. The less action reuse options (\verb|-T1|, \verb|-F1| and \verb|-G1|) +will usually produce faster running code by expanding each transition's action +list into a single block of code, eliminating the need to iterate through the +lists. This duplicates action code instead of generating the logic necessary +for reuse. Consequently the binary will be larger. However, this tradeoff applies to +machines with moderate to dense action lists only. If a machine's transitions +frequently have less than two actions then the less reuse options will actually +produce both a smaller and a faster running binary due to less action sharing +overhead. The best way to choose the appropriate code style for your +application is to perform your own tests. + +The table-driven FSM represents the state machine as constant static data. There are +tables of states, transitions, indices and actions. The current state is +stored in a variable. The execution is simply a loop that looks up the current +state, looks up the transition to take, executes any actions and moves to the +target state. In general, the table-driven FSM can handle any machine, produces +a smaller binary and requires a less expensive host language compile, but +results in slower running code. Since the table-driven format is the most +flexible it is the default code style. + +The flat table-driven machine is a table-based machine that is optimized for +small alphabets. Where the regular table machine uses the current character as +the key in a binary search for the transition to take, the flat table machine +uses the current character as an index into an array of transitions. This is +faster in general, however is only suitable if the span of possible characters +is small. + +The goto-driven FSM represents the state machine using goto and switch +statements. The execution is a flat code block where the transition to take is +computed using switch statements and directly executable binary searches. In +general, the goto FSM produces faster code but results in a larger binary and a +more expensive host language compile. + +The goto-driven format has an additional action reuse level (\verb|-G2|) that +writes actions directly into the state transitioning logic rather than putting +all the actions together into a single switch. Generally this produces faster +running code because it allows the machine to encode the current state using +the processor's instruction pointer. Again, sparse machines may actually +compile to smaller binaries when \verb|-G2| is used due to less state and +action management overhead. For many parsing applications \verb|-G2| is the +preferred output format. + +\verbspace +\begin{center} +\begin{tabular}{|c|c|} +\hline +\multicolumn{2}{|c|}{\bf Code Output Style Options} \\ +\hline +\verb|-T0|&binary search table-driven\\ +\hline +\verb|-T1|&binary search, expanded actions\\ +\hline +\verb|-F0|&flat table-driven\\ +\hline +\verb|-F1|&flat table, expanded actions\\ +\hline +\verb|-G0|&goto-driven\\ +\hline +\verb|-G1|&goto, expanded actions\\ +\hline +\verb|-G2|&goto, in-place actions\\ +\hline +\end{tabular} +\end{center} + +\section{Graphviz} + +Ragel is able to emit compiled state machines in Graphviz's Dot file format. +Graphviz support allows users to perform +incremental visualization of their parsers. User actions are displayed on +transition labels of the graph. If the final graph is too large to be +meaningful, or even drawn, the user is able to inspect portions of the parser +by naming particular regular expression definitions with the \verb|-S| and +\verb|-M| options to the \verb|ragel| program. Use of Graphviz greatly +improves the Ragel programming experience. It allows users to learn Ragel by +experimentation and also to track down bugs caused by unintended +nondeterminism. + +\end{document} diff --git a/doc/ragel.1.in b/doc/ragel.1.in new file mode 100644 index 0000000..cdae3e9 --- /dev/null +++ b/doc/ragel.1.in @@ -0,0 +1,561 @@ +.\" +.\" Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca> +.\" + +.\" This file is part of Ragel. +.\" +.\" Ragel is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" +.\" Ragel is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with Ragel; if not, write to the Free Software +.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +.\" Process this file with +.\" groff -man -Tascii ragel.1 +.\" +.TH RAGEL 1 "@PUBDATE@" "Ragel @VERSION@" "Ragel State Machine Compiler" +.SH NAME +ragel \- compile regular languages into executable state machines +.SH SYNOPSIS +.B ragel +.RI [ options ] +.I file +.SH DESCRIPTION +.B Note: +this is the frontend component of Ragel, which generates an intermediate +file format that must be processed by rlcodegen(1). + +Ragel compiles finite state machines from regular languages into executable +code. Ragel can generate C, C++, Objective-C, D, or Java code. Ragel state +machines can not only recognize byte +sequences as regular expression machines do, but can also execute code at +arbitrary points in the recognition of a regular language. User code is +embedded using inline operators that do not disrupt the regular language +syntax. + +The core language consists of standard regular expression operators, such as +union, concatenation and kleene star, accompanied by action embedding +operators. Ragel also provides operators that let you control any +non-determinism that you create, construct scanners using the longest match +paradigm, and build state machines using the statechart model. It is also +possible to influence the execution of a state machine from inside an embedded +action by jumping or calling to other parts of the machine and reprocessing +input. + +Ragel provides a very flexibile interface to the host language that attempts to +place minimal restrictions on how the generated code is used and integrated +into the application. The generated code has no dependencies. + +.SH OPTIONS +.TP +.BR \-h ", " \-H ", " \-? ", " \-\-help +Display help and exit. +.TP +.B \-o " file" +Write output to file. If -o is not given, a default file name is chosen by +replacing the suffix of the input. For source files ending in .rh the suffix .h +is used. For all other source files a suffix based on the output language +is used (.c, .cpp, .m, .dot) +.TP +.B \-n +Do not perform state minimization. +.TP +.B \-m +Perform minimization once, at the end of the state machine compilation. +.TP +.B \-l +Minimize after nearly every operation. Lists of like operations such as unions +are minimized once at the end. This is the default minimization option. +.TP +.B \-e +Minimize after every operation. +.TP +.B \-S <spec> +FSM specification to output for -V +.TP +.B \-M <machine> +Machine definition/instantiation to output for -V +.TP +.B \-C +The host language is C, C++, Obj-C or Obj-C++. This is the default host language option. +.TP +.B \-D +The host language is D. +.TP +.B \-J +The host language is Java. +.SH RAGEL INPUT +NOTE: This is a very brief description of Ragel input. Ragel is described in +more detail in the user guide available from the homepage (see below). + +Ragel normally passes input files straight to the output. When it sees an FSM +specification that contains machine instantiations it stops to generate the +state machine. If there are write statements (such as "write exec") then ragel emits the +corresponding code. There can be any number of FSM specifications in an input +file. A multi-line FSM specification starts with '%%{' and ends with '}%%'. A +single line FSM specification starts with %% and ends at the first newline. +.SH FSM STATEMENTS +.TP +.I Machine Name: +Set the the name of the machine. If given, it must be the first statement. +.TP +.I Alphabet Type: +Set the data type of the alphabet. +.TP +.I GetKey: +Specify how to retrieve the alphabet character from the element type. +.TP +.I Include: +Include a machine of same name as the current or of a different name in either +the current file or some other file. +.TP +.I Action Definition: +Define an action that can be invoked by the FSM. +.TP +.I Fsm Definition, Instantiation and Longest Match Instantiation: +Used to build FSMs. Syntax description in next few sections. +.TP +.I Access: +Specify how to access the persistent state machine variables. +.TP +.I Write: +Write some component of the machine. +.SH BASIC MACHINES +The basic machines are the base operands of the regular language expressions. +.TP +.I 'hello' +Concat literal. Produces a concatenation of the characters in the string. +Supports escape sequences with '\\'. The result will have a start state and a +transition to a new state for each character in the string. The last state in +the sequence will be made final. To make the string case-insensitive, append +an 'i' to the string, as in 'cmd'i\fR. +.TP +.I \(dqhello\(dq +Identical to single quote version. +.TP +.I [hello] +Or literal. Produces a union of characters. Supports character ranges +with '\-', negating the sense of the union with an initial '^' and escape +sequences with '\\'. The result will have two states with a transition between +them for each character or range. +.LP +NOTE: '', "", and [] produce null FSMs. Null machines have one state that is +both a start state and a final state and match the zero length string. A null machine +may be created with the null builtin machine. +.TP +.I integer +Makes a two state machine with one transition on the given integer number. +.TP +.I hex +Makes a two state machine with one transition on the given hexidecimal number. +.TP +.I "/simple_regex/" +A simple regular expression. Supports the notation '.', '*' and '[]', character +ranges with '\-', negating the sense of an OR expression with and initial '^' +and escape sequences with '\\'. Also supports one trailing flag: i. Use it to +produce a case-insensitive regular expression, as in /GET/i. +.TP +.I lit .. lit +Specifies a range. The allowable upper and lower bounds are concat literals of +length one and number machines. +For example, 0x10..0x20, 0..63, and 'a'..'z' are valid ranges. +.TP +.I "variable_name" +References the machine definition assigned to the variable name given. +.TP +.I "builtin_machine" +There are several builtin machines available. They are all two state machines +for the purpose of matching common classes of characters. They are: +.RS +.TP +.B any +Any character in the alphabet. +.TP +.B ascii +Ascii characters 0..127. +.TP +.B extend +Ascii extended characters. This is the range -128..127 for signed alphabets +and the range 0..255 for unsigned alphabets. +.TP +.B alpha +Alphabetic characters /[A-Za-z]/. +.TP +.B digit +Digits /[0-9]/. +.TP +.B alnum +Alpha numerics /[0-9A-Za-z]/. +.TP +.B lower +Lowercase characters /[a-z]/. +.TP +.B upper +Uppercase characters /[A-Z]/. +.TP +.B xdigit +Hexidecimal digits /[0-9A-Fa-f]/. +.TP +.B cntrl +Control characters 0..31. +.TP +.B graph +Graphical characters /[!-~]/. +.TP +.B print +Printable characters /[ -~]/. +.TP +.B punct +Punctuation. Graphical characters that are not alpha-numerics +/[!-/:-@\\[-`{-~]/. +.TP +.B space +Whitespace /[\\t\\v\\f\\n\\r ]/. +.TP +.B null +Zero length string. Equivalent to '', "" and []. +.TP +.B empty +Empty set. Matches nothing. +.RE +.SH BRIEF OPERATOR REFERENCE +Operators are grouped by precedence, group 1 being the lowest and group 6 the +highest. +.LP +.B GROUP 1: +.TP +.I expr , expr +Join machines together without drawing any transitions, setting up a start +state or any final states. Start state must be explicitly specified with the +"start" label. Final states may be specified with the an epsilon transitions to +the implicitly created "final" state. +.LP +.B GROUP 2: +.TP +.I expr | expr +Produces a machine that matches any string in machine one or machine two. +.TP +.I expr & expr +Produces a machine that matches any string that is in both machine one and +machine two. +.TP +.I expr - expr +Produces a machine that matches string that is in machine one but not in +machine two. +.LP +.B GROUP 3: +.TP +.I expr . expr +Produces a machine that matches all the strings in machine one followed +by all the strings in machine two. +.LP +NOTE: Concatenation is the default operator. Two machines next to each other +with no operator between them results in the concatenation operation. +.LP +.B GROUP 4: +.TP +.I label: expr +Attaches a label to an expression. Labels can be used by epsilon transitions +and fgoto and fcall statements in actions. Also note that the referencing of a +machine definition causes the implicit creation of label by the same name. +.LP +.B GROUP 5: +.TP +.I expr -> label +Draws an epsilon transition to the state defined by label. Label must +be a name in the current scope. Epsilon transitions are resolved when +comma operators are evaluated and at the root of the expression tree of +machine assignment/instantiation. +.LP +.B GROUP 6: Actions +.LP +An action may be a name predefined with an action statement or may +be specified directly with '{' and '}' in the expression. +.TP +.I expr > action +Embeds action into starting transitions. +.TP +.I expr @ action +Embeds action into transitions that go into a final state. +.TP +.I expr $ action +Embeds action into all transitions. Does not include pending out transitions. +.TP +.I expr % action +Embeds action into pending out transitions from final states. +.LP +.B GROUP 6: EOF Actions +.LP +When a machine's finish routine is called the current state's EOF actions are +executed. +.TP +.I expr >/ action +Embed an EOF action into the start state. +.TP +.I expr </ action +Embed an EOF action into all states except the start state. +.TP +.I expr $/ action +Embed an EOF action into all states. +.TP +.I expr %/ action +Embed an EOF action into final states. +.TP +.I expr @/ action +Embed an EOF action into all states that are not final. +.TP +.I expr <>/ action +Embed an EOF action into all states that are not the start +state and that are not final (middle states). +.LP +.B GROUP 6: Global Error Actions +.LP +Global error actions are stored in states until the final state machine has +been fully constructed. They are then transferred to error transitions, giving +the effect of a default action. +.TP +.I expr >! action +Embed a global error action into the start state. +.TP +.I expr <! action +Embed a global error action into all states except the start state. +.TP +.I expr $! action +Embed a global error action into all states. +.TP +.I expr %! action +Embed a global error action into the final states. +.TP +.I expr @! action +Embed a global error action into all states which are not final. +.TP +.I expr <>! action +Embed a global error action into all states which are not the start state and +are not final (middle states). +.LP +.B GROUP 6: Local Error Actions +.LP +Local error actions are stored in states until the named machine is fully +constructed. They are then transferred to error transitions, giving the effect +of a default action for a section of the total machine. Note that the name may +be omitted, in which case the action will be transferred to error actions upon +construction of the current machine. +.TP +.I expr >^ action +Embed a local error action into the start state. +.TP +.I expr <^ action +Embed a local error action into all states except the start state. +.TP +.I expr $^ action +Embed a local error action into all states. +.TP +.I expr %^ action +Embed a local error action into the final states. +.TP +.I expr @^ action +Embed a local error action into all states which are not final. +.TP +.I expr <>^ action +Embed a local error action into all states which are not the start state and +are not final (middle states). +.LP +.B GROUP 6: To-State Actions +.LP +To state actions are stored in states and executed any time the machine moves +into a state. This includes regular transitions, and transfers of control such +as fgoto. Note that setting the current state from outside the machine (for +example during initialization) does not count as a transition into a state. +.TP +.I expr >~ action +Embed a to-state action action into the start state. +.TP +.I expr <~ action +Embed a to-state action into all states except the start state. +.TP +.I expr $~ action +Embed a to-state action into all states. +.TP +.I expr %~ action +Embed a to-state action into the final states. +.TP +.I expr @~ action +Embed a to-state action into all states which are not final. +.TP +.I expr <>~ action +Embed a to-state action into all states which are not the start state and +are not final (middle states). +.LP +.B GROUP 6: From-State Actions +.LP +From state actions are executed whenever a state takes a transition on a character. +This includes the error transition and a transition to self. +.TP +.I expr >* action +Embed a from-state action into the start state. +.TP +.I expr <* action +Embed a from-state action into every state except the start state. +.TP +.I expr $* action +Embed a from-state action into all states. +.TP +.I expr %* action +Embed a from-state action into the final states. +.TP +.I expr @* action +Embed a from-state action into all states which are not final. +.TP +.I expr <>* action +Embed a from-state action into all states which are not the start state and +are not final (middle states). +.LP +.B GROUP 6: Priority Assignment +.LP +Priorities are assigned to names within transitions. Only priorities on the +same name are allowed to interact. In the first form of priorities the name +defaults to the name of the machine definition the priority is assigned in. +Transitions do not have default priorities. +.TP +.I expr > int +Assigns the priority int in all transitions entering into the machine. +.TP +.I expr @ int +Assigns the priority int in all transitions that go into a final state. +.TP +.I expr $ int +Assigns the priority int in all existing transitions. +.TP +.I expr % int +Assigns the priority int in all pending out transitions. +.LP +A second form of priority assignment allows the programmer to specify the name +to which the priority is assigned, allowing interactions to cross machine +definition boundaries. +.TP +.I expr > (name,int) +Assigns the priority int to name in all transitions entering into the machine. +.TP +.I expr @ (name, int) +Assigns the priority int to name in all transitions that go into a final state. +.TP +.I expr $ (name, int) +Assigns the priority int to name in all existing transitions. +.TP +.I expr % (name, int) +Assigns the priority int to name in all pending out transitions. +.LP +.B GROUP 7: +.TP +.I expr * +Produces the kleene star of a machine. Matches zero or more repetitions of the +machine. +.TP +.I expr ** +Longest Matching Kleene Star. This version of kleene star puts a higher +priority on staying in the machine over wrapping around and starting over. This +operator is equivalent to ( ( expr ) $0 %1 )*. +.TP +.I expr ? +Produces a machine that accepts the machine given or the null string. This operator +is equivalent to ( expr | '' ). +.TP +.I expr + +Produces the machine concatenated with the kleen star of itself. Matches one or +more repetitions of the machine. This operator is equivalent to ( expr . expr* ). +.TP +.I expr {n} +Produces a machine that matches exactly n repetitions of expr. +.TP +.I expr {,n} +Produces a machine that matches anywhere from zero to n repetitions of expr. +.TP +.I expr {n,} +Produces a machine that matches n or more repetitions of expr. +.TP +.I expr {n,m} +Produces a machine that matches n to m repetitions of expr. +.LP +.B GROUP 8: +.TP +.I ! expr +Produces a machine that matches any string not matched by the given machine. +This operator is equivalent to ( *extend - expr ). +.LP +.B GROUP 9: +.TP +.I ( expr ) +Forces precedence on operators. +.SH VALUES AVAILABLE IN CODE BLOCKS +.TP +.I fc +The current character. Equivalent to *p. +.TP +.I fpc +A pointer to the current character. Equivalent to p. +.TP +.I fcurs +An integer value representing the current state. +.TP +.I ftargs +An integer value representing the target state. +.TP +.I fentry(<label>) +An integer value representing the entry point <label>. +.SH STATEMENTS AVAILABLE IN CODE BLOCKS +.TP +.I fhold; +Do not advance over the current character. Equivalent to --p;. +.TP +.I fexec <expr>; +Sets the current character to something else. Equivalent to p = (<expr>)-1; +.TP +.I fgoto <label>; +Jump to the machine defined by <label>. +.TP +.I fgoto *<expr>; +Jump to the entry point given by <expr>. The expression must +evaluate to an integer value representing a state. +.TP +.I fnext <label>; +Set the next state to be the entry point defined by <label>. The fnext +statement does not immediately jump to the specified state. Any action code +following the statement is executed. +.TP +.I fnext *<expr>; +Set the next state to be the entry point given by <expr>. The expression must +evaluate to an integer value representing a state. +.TP +.I fcall <label>; +Call the machine defined by <label>. The next fret will jump to the +target of the transition on which the action is invoked. +.TP +.I fcall *<expr>; +Call the entry point given by <expr>. The next fret will jump to the target of +the transition on which the action is invoked. +.TP +.I fret; +Return to the target state of the transition on which the last fcall was made. +.TP +.I fbreak; +Save the current state and immediately break out of the machine. +.SH BUGS +Ragel is still under development and has not yet matured. There are probably +many bugs. +.SH CREDITS +Ragel was written by Adrian Thurston <thurston@cs.queensu.ca>. Objective-C +output contributed by Eric Ocean. D output contributed by Alan West. +.SH "SEE ALSO" +.BR rlcodegen (1), +.BR re2c (1), +.BR flex (1) + +Homepage: http://www.cs.queensu.ca/home/thurston/ragel/ diff --git a/doc/rlcodegen.1.in b/doc/rlcodegen.1.in new file mode 100644 index 0000000..516229d --- /dev/null +++ b/doc/rlcodegen.1.in @@ -0,0 +1,107 @@ +.\" +.\" Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> +.\" + +.\" This file is part of Ragel. +.\" +.\" Ragel is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" +.\" Ragel is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with Ragel; if not, write to the Free Software +.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +.\" Process this file with +.\" groff -man -Tascii rlcodegen.1 +.\" +.TH RAGEL 1 "@PUBDATE@" "Ragel @VERSION@" "Ragel State Machine Compiler" +.SH NAME +rlcodegen \- code generator for Ragel State Machine Compiler +.SH SYNOPSIS +.B rlcodegen +.RI [ options ] +.I file +.SH DESCRIPTION +.B Note: +this is the backend component of Ragel. This program accepts a machine +compiled by the frontend program ragel(1) and generates either code or a +graphviz dot file. + +.SH OPTIONS +.TP +.BR \-h ", " \-H ", " \-? ", " \-\-help +Display help and exit. +.TP +.B \-o " file" +Write output to file. If -o is not given, a default file name is chosen by +replacing the suffix of the input. For source files ending in .rh the suffix .h +is used. For all other source files a suffix based on the output language +is used (.c, .cpp, .m, .dot) +.TP +.B \-V +Generate a Graphviz dotfile instead of code. By default this option writes the +dotfile to standard output. The frontend options -M and -S can be used +to specify a subset of the grammar to write. +.TP +.B \-p +Print printable characters in Graphviz output. +.TP +.B \-T0 +Generate a table driven FSM. This is the default code style. The table driven +FSM represents the state machine as static data. There are tables of states, +transitions, indicies and actions. The current state is stored in a variable. +The execution is a loop that looks that given the current state and current +character to process looks up the transition to take using a binary search, +executes any actions and moves to the target state. In general, the table +driven FSM produces a smaller binary and requires a less expensive host language +compile but results in slower running code. The table driven FSM is suitable +for any FSM. +.TP +.B \-T1 +Generate a faster table driven FSM by expanding action lists in the action +execute code. +.TP +.B \-F0 +Generate a flat table driven FSM. Transitions are represented as an array +indexed by the current alphabet character. This eliminates the need for a +binary search to locate transitions and produces faster code, however it is +only suitable for small alphabets. +.TP +.B \-F1 +Generate a faster flat table driven FSM by expanding action lists in the action +execute code. +.TP +.B \-G0 +Generate a goto driven FSM. The goto driven FSM represents the state machine +as a series of goto statements. While in the machine, the current state is +stored by the processor's instruction pointer. The execution is a flat function +where control is passed from state to state using gotos. In general, the goto +FSM produces faster code but results in a larger binary and a more expensive +host language compile. +.TP +.B \-G1 +Generate a faster goto driven FSM by expanding action lists in the action +execute code. +.TP +.B \-G2 +Generate a really fast goto driven FSM by embedding action lists in the state +machine control code. +.SH BUGS +Ragel is still under development and has not yet matured. There are probably +many bugs. +.SH CREDITS +Ragel was written by Adrian Thurston <thurston@cs.queensu.ca>. Objective-C +output contributed by Eric Ocean. D output contributed by Alan West. +.SH "SEE ALSO" +.BR ragel (1), +.BR re2c (1), +.BR flex (1) + +Homepage: http://www.cs.queensu.ca/home/thurston/ragel/ diff --git a/doc/stembed.fig b/doc/stembed.fig new file mode 100644 index 0000000..eb3ce8d --- /dev/null +++ b/doc/stembed.fig @@ -0,0 +1,72 @@ +#FIG 3.2 Produced by xfig version 3.2.5-alpha5 +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 463 1772 463 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 955 1772 955 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 1461 1772 1461 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 1948 1772 1948 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 2403 1772 2403 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 2906 1772 2906 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3377 173 3510 173 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3377 881 3510 881 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3377 532 3510 532 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3377 1609 3510 1609 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3377 1260 3510 1260 +4 0 0 50 -1 12 12 0.0000 4 105 240 405 225 >~\001 +4 0 0 50 -1 0 12 0.0000 4 150 1545 3690 585 from-state actions\001 +4 0 0 50 -1 0 12 0.0000 4 150 1290 3690 225 to state actions\001 +4 0 0 50 -1 0 12 0.0000 4 150 1545 3690 1665 local error actions\001 +4 0 0 50 -1 0 12 0.0000 4 150 1095 3690 1305 error actions\001 +4 0 0 50 -1 0 12 0.0000 4 150 1065 3690 945 EOF actions\001 +4 0 0 50 -1 0 12 5.6723 4 120 855 405 2044 start state\001 +4 0 0 50 -1 0 12 5.6723 4 150 360 1409 2071 final\001 +4 0 0 50 -1 0 12 5.6723 4 150 750 901 2038 all states\001 +4 0 0 50 -1 12 12 0.0000 4 165 240 900 225 $~\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 1395 225 %~\001 +4 0 0 50 -1 12 12 0.0000 4 105 240 1890 225 <~\001 +4 0 0 50 -1 12 12 0.0000 4 135 360 2835 225 <>~\001 +4 0 0 50 -1 12 12 0.0000 4 120 360 405 585 >* \001 +4 0 0 50 -1 12 12 0.0000 4 165 240 900 585 $*\001 +4 0 0 50 -1 12 12 0.0000 4 135 360 2835 585 <>*\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 405 1305 >!\001 +4 0 0 50 -1 12 12 0.0000 4 150 240 405 945 >/\001 +4 0 0 50 -1 12 12 0.0000 4 165 240 900 945 $/\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 1395 585 %*\001 +4 0 0 50 -1 12 12 0.0000 4 150 240 1395 945 %/\001 +4 0 0 50 -1 12 12 0.0000 4 150 240 1890 945 </\001 +4 0 0 50 -1 12 12 0.0000 4 150 240 2340 945 @/\001 +4 0 0 50 -1 12 12 0.0000 4 150 360 2835 945 <>/\001 +4 0 0 50 -1 12 12 0.0000 4 105 240 1890 585 <*\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 1890 1305 <!\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 1395 1305 %!\001 +4 0 0 50 -1 12 12 0.0000 4 165 240 900 1305 $!\001 +4 0 0 50 -1 12 12 0.0000 4 165 240 900 1665 $^\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 405 1665 >^\001 +4 0 0 50 -1 12 12 0.0000 4 135 240 1395 1665 %^\001 +4 0 0 50 -1 12 12 0.0000 4 120 240 1890 1665 <^\001 +4 0 0 50 -1 12 12 0.0000 4 135 240 2340 1665 @^\001 +4 0 0 50 -1 12 12 0.0000 4 135 360 2835 1665 <>^\001 +4 0 0 50 -1 12 12 0.0000 4 135 240 2340 1305 @!\001 +4 0 0 50 -1 12 12 0.0000 4 135 360 2835 1305 <>!\001 +4 0 0 50 -1 12 12 0.0000 4 135 240 2340 585 @*\001 +4 0 0 50 -1 12 12 0.0000 4 135 240 2340 225 @~\001 +4 0 0 50 -1 0 12 5.6723 4 150 1635 2860 2053 not start & not final\001 +4 0 0 50 -1 0 12 5.6723 4 120 705 1883 2050 not start\001 +4 0 0 50 -1 0 12 5.6723 4 150 675 2359 2048 not final\001 diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..e1e7808 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,37 @@ +# +# Copyright 2002-2003 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +SUBDIRS = \ + atoi awkemu clang concurrent format gotocallret mailbox params rlscan \ + statechart cppscan + +all: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) || exit 1; cd ..; done + +ps: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) ps || exit 1; cd ..; done + +clean: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean || exit 1; cd ..; done + +distclean: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean || exit 1; cd ..; done + diff --git a/examples/README b/examples/README new file mode 100644 index 0000000..12773cb --- /dev/null +++ b/examples/README @@ -0,0 +1,40 @@ + + Ragel State Machine Compiler -- Examples + ======================================== + +atoi -- Converts a string to an integer. + +awkemu -- Perfoms the basic parsing that the awk program perfoms on input. + The awk equivalent to awkemu is in awkemu/awkequiv.awk + +clang -- A scanner for a simple C like language. It breaks input up into + words, numbers, strings and symbols and strips out whitespace + and comments. It is a suitable template for writing a parser + that finds a sequence of tokens. + +concurrent -- Demonstrates the ability of ragel to produce parsers that + perform independent tasks concurrently. + +cppscan -- A C++ scanner that uses the longest match scanning method. This + example differs from other examples of scanning. Each run of the + state machine matches one token. This method results in a + smaller state machine since the final kleene star is omitted and + therefore every state does not need to get all the transitions + of the start state. + +format -- Partial printf implementation. + +gotocallret -- Demonstrate the use of fgoto, fcall and fret. + +mailbox -- Parses unix mailbox files. It breaks files into messages, and + messages into headers and body. It demonstrates Ragel's ability + to make parsers for structured file formats. + +params -- Parses command line arguements. + +rlscan -- Lexes Ragel input files. + +statechart -- Demonstrate the use of labels, the epsilon operator, and the + join operator for creating machines using the named state and + transition list paradigm. This implementes the same machine as + the atoi example. diff --git a/examples/atoi/Makefile b/examples/atoi/Makefile new file mode 100644 index 0000000..901de19 --- /dev/null +++ b/examples/atoi/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: atoi + +ps: atoi.ps + +atoi: atoi.o + g++ -g -o atoi atoi.o + +atoi.cpp: atoi.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) atoi.rl | $(RLCODEGEN) -G2 -o atoi.cpp + +atoi.o: atoi.cpp + g++ -Wall -g -c -O3 -o $@ $< + +atoi.ps: atoi.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) atoi.rl | $(RLCODEGEN) -V | dot -Tps > atoi.ps + +distclean clean: + rm -Rf *.o atoi.cpp atoi atoi.ps diff --git a/examples/atoi/atoi.rl b/examples/atoi/atoi.rl new file mode 100644 index 0000000..0d354a0 --- /dev/null +++ b/examples/atoi/atoi.rl @@ -0,0 +1,60 @@ +/* + * Convert a string to an integer. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +%%{ + machine atoi; + write data noerror; +}%% + +int atoi( char *str ) +{ + char *p = str; + int cs, val = 0; + bool neg = false;; + + %%{ + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + main := + ( '-'@see_neg | '+' )? ( digit @add_digit )+ + '\n' @{ fbreak; }; + + # Inintialize and execute. + write init; + write exec noend; + }%% + + if ( neg ) + val = -1 * val; + + if ( cs < atoi_first_final ) + cerr << "atoi: there was an error" << endl; + + return val; +}; + + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + int value = atoi( buf ); + cout << value << endl; + } + return 0; +} diff --git a/examples/awkemu/Makefile b/examples/awkemu/Makefile new file mode 100644 index 0000000..5e6ecde --- /dev/null +++ b/examples/awkemu/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: awkemu + +ps: awkemu.ps + +awkemu: awkemu.o + gcc -g -o awkemu awkemu.o + +awkemu.c: awkemu.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) awkemu.rl | $(RLCODEGEN) -G2 -o awkemu.c + +awkemu.ps: awkemu.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) awkemu.rl | $(RLCODEGEN) -V | dot -Tps > awkemu.ps + +%.o: %.c + gcc -pedantic -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o awkemu.c awkemu awkemu.ps diff --git a/examples/awkemu/awkemu.rl b/examples/awkemu/awkemu.rl new file mode 100644 index 0000000..6615943 --- /dev/null +++ b/examples/awkemu/awkemu.rl @@ -0,0 +1,116 @@ +/* + * Perform the basic line parsing of input performed by awk. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +%%{ + machine awkemu; + + action start_word { + ws[nwords] = fpc; + } + + action end_word { + we[nwords++] = fpc; + } + + action start_line { + nwords = 0; + ls = fpc; + } + + action end_line { + printf("endline(%i): ", nwords ); + fwrite( ls, 1, p - ls, stdout ); + printf("\n"); + + for ( i = 0; i < nwords; i++ ) { + printf(" word: "); + fwrite( ws[i], 1, we[i] - ws[i], stdout ); + printf("\n"); + } + } + + # Words in a line. + word = ^[ \t\n]+; + + # The whitespace separating words in a line. + whitespace = [ \t]; + + # The components in a line to break up. Either a word or a single char of + # whitespace. On the word capture characters. + blineElements = word >start_word %end_word | whitespace; + + # Star the break line elements. Just be careful to decrement the leaving + # priority as we don't want multiple character identifiers to be treated as + # multiple single char identifiers. + line = ( blineElements** '\n' ) >start_line @end_line; + + # Any number of lines. + main := line*; +}%% + +%% write data noerror nofinal; + +#define MAXWORDS 256 +#define BUFSIZE 4096 +char buf[BUFSIZE]; + +int main() +{ + int i, nwords = 0; + char *ls = 0; + char *ws[MAXWORDS]; + char *we[MAXWORDS]; + + int cs; + int have = 0; + + %% write init; + + while ( 1 ) { + char *p, *pe, *data = buf + have; + int len, space = BUFSIZE - have; + /* fprintf( stderr, "space: %i\n", space ); */ + + if ( space == 0 ) { + fprintf(stderr, "buffer out of space\n"); + exit(1); + } + + len = fread( data, 1, space, stdin ); + /* fprintf( stderr, "len: %i\n", len ); */ + if ( len == 0 ) + break; + + /* Find the last newline by searching backwards. This is where + * we will stop processing on this iteration. */ + p = buf; + pe = buf + have + len - 1; + while ( *pe != '\n' && pe >= buf ) + pe--; + pe += 1; + + /* fprintf( stderr, "running on: %i\n", pe - p ); */ + + %% write exec; + + /* How much is still in the buffer. */ + have = data + len - pe; + if ( have > 0 ) + memmove( buf, pe, have ); + + /* fprintf(stderr, "have: %i\n", have ); */ + + if ( len < space ) + break; + } + + if ( have > 0 ) + fprintf(stderr, "input not newline terminated\n"); + return 0; +} diff --git a/examples/awkemu/awkequiv.awk b/examples/awkemu/awkequiv.awk new file mode 100755 index 0000000..9877dd3 --- /dev/null +++ b/examples/awkemu/awkequiv.awk @@ -0,0 +1,10 @@ +#!/usr/bin/awk -f +# + + +{ + print "endline(" NF "): " $0 + for ( i = 1; i <= NF; i++ ) { + print " word: " $i + } +} diff --git a/examples/clang/Makefile b/examples/clang/Makefile new file mode 100644 index 0000000..d305406 --- /dev/null +++ b/examples/clang/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: clang + +ps: clang.ps + +clang: clang.o + gcc -g -o clang clang.o + +clang.c: clang.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) clang.rl | $(RLCODEGEN) -G2 -o clang.c + +clang.ps: clang.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) clang.rl | $(RLCODEGEN) -V | dot -Tps > clang.ps + +%.o: %.c + gcc -pedantic -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o clang.c clang clang.ps diff --git a/examples/clang/clang.rl b/examples/clang/clang.rl new file mode 100644 index 0000000..7ecfeef --- /dev/null +++ b/examples/clang/clang.rl @@ -0,0 +1,150 @@ +/* + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +%%{ + machine clang; + + newline = '\n' @{curline += 1;}; + any_count_line = any | newline; + + # Consume a C comment. + c_comment := any_count_line* :>> '*/' @{fgoto main;}; + + main := |* + + # Alpha numberic characters or underscore. + alnum_u = alnum | '_'; + + # Alpha charactres or underscore. + alpha_u = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + ( punct - [_'"] ) { + printf( "symbol(%i): %c\n", curline, tokstart[0] ); + }; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + alpha_u alnum_u* { + printf( "ident(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Single Quote. + sliteralChar = [^'\\] | newline | ( '\\' . any_count_line ); + '\'' . sliteralChar* . '\'' { + printf( "single_lit(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Double Quote. + dliteralChar = [^"\\] | newline | ( '\\' any_count_line ); + '"' . dliteralChar* . '"' { + printf( "double_lit(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Whitespace is standard ws, newlines and control codes. + any_count_line - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + '//' [^\n]* newline; + + '/*' { fgoto c_comment; }; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + digit+ { + printf( "int(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + digit+ '.' digit+ { + printf( "float(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + '0x' xdigit+ { + printf( "hex(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + *|; +}%% + +%% write data nofinal; + +#define BUFSIZE 128 + +void scanner() +{ + static char buf[BUFSIZE]; + int cs, act, have = 0, curline = 1; + char *tokstart, *tokend = 0; + int done = 0; + + %% write init; + + while ( !done ) { + char *p = buf + have, *pe; + int len, space = BUFSIZE - have; + + if ( space == 0 ) { + /* We've used up the entire buffer storing an already-parsed token + * prefix that must be preserved. */ + fprintf(stderr, "OUT OF BUFFER SPACE\n" ); + exit(1); + } + + len = fread( p, 1, space, stdin ); + + /* If this is the last buffer, tack on an EOF. */ + if ( len < space ) { + p[len++] = 0; + done = 1; + } + + pe = p + len; + %% write exec; + + if ( cs == clang_error ) { + fprintf(stderr, "PARSE ERROR\n" ); + break; + } + + if ( tokstart == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - tokstart; + memmove( buf, tokstart, have ); + tokend = buf + (tokend-tokstart); + tokstart = buf; + } + } +} + +int main() +{ + scanner(); + return 0; +} + diff --git a/examples/concurrent/Makefile b/examples/concurrent/Makefile new file mode 100644 index 0000000..b9a09f6 --- /dev/null +++ b/examples/concurrent/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: concurrent + +ps: concurrent.ps + +concurrent: concurrent.o + g++ -g -o concurrent concurrent.o + +concurrent.cpp: concurrent.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) concurrent.rl | $(RLCODEGEN) -G2 -o concurrent.cpp + +concurrent.ps: concurrent.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) concurrent.rl | $(RLCODEGEN) -V | dot -Tps > concurrent.ps + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o concurrent.cpp concurrent concurrent.ps diff --git a/examples/concurrent/concurrent.rl b/examples/concurrent/concurrent.rl new file mode 100644 index 0000000..b70fd5d --- /dev/null +++ b/examples/concurrent/concurrent.rl @@ -0,0 +1,126 @@ +/* + * Show off concurrent abilities. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct Concurrent +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine Concurrent; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + action start_comment { + start_comment = cur_char; + } + action end_comment { + cout << "comment: " << start_comment << + " " << cur_char-1 << endl; + } + + action start_literal { + start_literal = cur_char; + } + action end_literal { + cout << "literal: " << start_literal << + " " << cur_char-1 << endl; + } + + # Count characters. + chars = ( any @next_char )*; + + # Words are non-whitespace. + word = ( any-space )+ >start_word %end_word; + words = ( ( word | space ) $1 %0 )*; + + # Finds C style comments. + comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment; + comments = ( comment | any )**; + + # Finds single quoted strings. + literalChar = ( any - ['\\] ) | ( '\\' . any ); + literal = ('\'' literalChar* '\'' ) >start_literal %end_literal; + literals = ( ( literal | (any-'\'') ) $1 %0 )*; + + main := chars | words | comments | literals; +}%% + +%% write data; + +int Concurrent::init( ) +{ + %% write init; + cur_char = 0; + return 1; +} + +int Concurrent::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +int Concurrent::finish( ) +{ + %% write eof; + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +Concurrent concurrent; +char buf[BUFSIZE]; + +int main() +{ + concurrent.init(); + while ( 1 ) { + int len = fread( buf, 1, BUFSIZE, stdin ); + concurrent.execute( buf, len ); + if ( len != BUFSIZE ) + break; + } + + if ( concurrent.finish() <= 0 ) + cerr << "concurrent: error parsing input" << endl; + return 0; +} diff --git a/examples/cppscan/Makefile b/examples/cppscan/Makefile new file mode 100644 index 0000000..6a92c82 --- /dev/null +++ b/examples/cppscan/Makefile @@ -0,0 +1,41 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen +FLEX = flex +RE2C = re2c + +CFLAGS = -Wall -g -O3 + +all: cppscan lex-cppscan re2c-cppscan + +ps: cppscan.ps + +cppscan: cppscan.o + g++ -g -o $@ $< + +lex-cppscan: lex-cppscan.o + g++ -g -o $@ $< + +re2c-cppscan: re2c-cppscan.o + g++ -g -o $@ $< + +cppscan.cpp: cppscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) cppscan.rl | $(RLCODEGEN) -G2 -o $@ + +lex-cppscan.cpp: cppscan.lex + $(FLEX) -f -o $@ $< + +re2c-cppscan.cpp: cppscan.rec + $(RE2C) -s $< > $@ + +example.cpp: example.rec + $(RE2C) -s $< > $@ + +%.o: %.cpp + g++ $(CFLAGS) -c -o $@ $< + +cppscan.ps: cppscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) cppscan.rl | $(RLCODEGEN) -V | dot -Tps > cppscan.ps + +distclean clean: + rm -Rf *.o cppscan.cpp cppscan cppscan.ps \ + lex-cppscan lex-cppscan.cpp re2c-cppscan re2c-cppscan.cpp diff --git a/examples/cppscan/cppscan.lex b/examples/cppscan/cppscan.lex new file mode 100644 index 0000000..fb66253 --- /dev/null +++ b/examples/cppscan/cppscan.lex @@ -0,0 +1,143 @@ +/* + * flex equivalent to cppscan.rl + */ + +%{ + +#include <stdio.h> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +int line = 1, col = 1; + +void token( int tok, char *data, int len ) +{ + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + fputc( data[i], stdout ); + fputc( '\n', stdout ); + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + + +%} + +%x COMMENT + +FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\. +EXPONENT [eE][+\-]?[0-9]+ +FLOAT_SUFFIX [flFL] + +%% + + /* Single and double literals. */ +L?\'([^\'\\\n]|\\.)*\' { + token( TK_Slit, yytext, yyleng ); +} + +L?\"([^\"\\\n]|\\.)*\" { + token( TK_Dlit, yytext, yyleng ); +} + +[a-zA-Z_][a-zA-Z0-9_]* { + token( TK_Id, yytext, yyleng ); +} + +{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? { + token( TK_Float, yytext, yyleng ); +} + +(0|[1-9][0-9]*)[ulUL]{0,3} { + token( TK_IntegerDecimal, yytext, yyleng ); +} + +0[0-9]+[ulUL]{0,2} { + token( TK_IntegerOctal, yytext, yyleng ); +} + +0x[0-9a-fA-F]+[ulUL]{0,2} { + token( TK_IntegerHex, yytext, yyleng ); +} + +:: token( TK_NameSep, yytext, yyleng ); +== token( TK_EqualsEquals, yytext, yyleng ); +!= token( TK_NotEquals, yytext, yyleng ); +&& token( TK_AndAnd, yytext, yyleng ); +\|\| token( TK_OrOr, yytext, yyleng ); +\*= token( TK_MultAssign, yytext, yyleng ); +\/= token( TK_DivAssign, yytext, yyleng ); +%= token( TK_PercentAssign, yytext, yyleng ); +\+= token( TK_PlusAssign, yytext, yyleng ); +-= token( TK_MinusAssign, yytext, yyleng ); +&= token( TK_AmpAssign, yytext, yyleng ); +^= token( TK_CaretAssign, yytext, yyleng ); +\|= token( TK_BarAssign, yytext, yyleng ); +\+\+ token( TK_PlusPlus, yytext, yyleng ); +-- token( TK_MinusMinus, yytext, yyleng ); +-> token( TK_Arrow, yytext, yyleng ); +->\* token( TK_ArrowStar, yytext, yyleng ); +\.\* token( TK_DotStar, yytext, yyleng ); +\.\.\. token( TK_DotDotDot, yytext, yyleng ); + +\/\* BEGIN(COMMENT); +<COMMENT>\*\/ BEGIN(INITIAL); +<COMMENT>(.|\n) { } + +\/\/.*\n {} +[^!-~]+ {} + +[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng ); + +%% + +int yywrap() +{ + /* Once the input is done, no more. */ + return 1; +} + +int main() +{ + yylex(); +} diff --git a/examples/cppscan/cppscan.rec b/examples/cppscan/cppscan.rec new file mode 100644 index 0000000..43f297d --- /dev/null +++ b/examples/cppscan/cppscan.rec @@ -0,0 +1,183 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +int line = 1, col = 1; + +void token( int tok, char *data, int len ) +{ + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + fputc( data[i], stdout ); + fputc( '\n', stdout ); + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + +#define BUFSIZE 8192 +char buf[BUFSIZE]; + +void fill( int n ) +{ + printf("fill(%i)\n", n); + exit(1); +} + +int main() +{ + char *start, *p = buf, *lim = buf, *marker; + int len, have, want, shift; + int done = 0; + +#define YYCTYPE char + +#define YYCURSOR p +#define YYLIMIT lim +#define YYMARKER marker + +#define YYFILL(n) { \ + if ( ! done ) { \ + have = lim-start; \ + if ( start > buf ) { \ + shift = start-buf; \ + memmove( buf, start, have ); \ + start -= shift; \ + p -= shift; \ + lim -= shift; \ + marker -= shift; \ + } \ + want = BUFSIZE - have - 1; \ + len = fread( lim, 1, want, stdin ); \ + lim += len; \ + if ( len < want ) { \ + *lim++ = 0; \ + done = 1; \ + } \ + } \ + } + +again: + start = p; + +/*!re2c + +ANY = [\000-\377]; +FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ "."; +EXPONENT = [eE] [+\-]? [0-9]+; +FLOATSUFFIX = [flFL]; + + "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" { + token( TK_Slit, start, p-start ); + goto again; + } + + "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" { + token( TK_Dlit, start, p-start ); + goto again; + } + + [a-zA-Z_][a-zA-Z0-9_]* { + token( TK_Id, start, p-start ); + goto again; + } + + ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) { + token( TK_Float, start, p-start ); + goto again; + } + + + ( "0" | [1-9][0-9]* ) [ulUL]* { + token( TK_IntegerDecimal, start, p-start ); + goto again; + } + + "0" [0-9]+ [ulUL]* { + token( TK_IntegerOctal, start, p-start ); + goto again; + } + + "0x" [0-9a-fA-F]+[ulUL]* { + token( TK_IntegerHex, start, p-start ); + goto again; + } + + "::" { token( TK_NameSep, start, p-start ); goto again; } + "==" { token( TK_EqualsEquals, start, p-start ); goto again; } + "!=" { token( TK_NotEquals, start, p-start ); goto again; } + "&&" { token( TK_AndAnd, start, p-start ); goto again; } + "||" { token( TK_OrOr, start, p-start ); goto again; } + "*=" { token( TK_MultAssign, start, p-start ); goto again; } + "/=" { token( TK_DivAssign, start, p-start ); goto again; } + "%=" { token( TK_PercentAssign, start, p-start ); goto again; } + "+=" { token( TK_PlusAssign, start, p-start ); goto again; } + "-=" { token( TK_MinusAssign, start, p-start ); goto again; } + "&=" { token( TK_AmpAssign, start, p-start ); goto again; } + "^=" { token( TK_CaretAssign, start, p-start ); goto again; } + "|=" { token( TK_BarAssign, start, p-start ); goto again; } + "++" { token( TK_PlusPlus, start, p-start ); goto again; } + "--" { token( TK_MinusMinus, start, p-start ); goto again; } + "->" { token( TK_Arrow, start, p-start ); goto again; } + "->*" { token( TK_ArrowStar, start, p-start ); goto again; } + ".*" { token( TK_DotStar, start, p-start ); goto again; } + "..." { token( TK_DotDotDot, start, p-start ); goto again; } + + "/*" { goto comment; } + "//" (ANY\"\n")* "\n" { goto again; } + [\001-\040\177]+ { goto again; } + + [\041-\057\072-\100\133-\140\173-\176] { + token( *start, start, p-start ); + goto again; + } + "\000" { return 0; } +*/ + +comment: +/*!re2c + "*/" { goto again; } + ANY { goto comment; } +*/ +} diff --git a/examples/cppscan/cppscan.rl b/examples/cppscan/cppscan.rl new file mode 100644 index 0000000..5c979eb --- /dev/null +++ b/examples/cppscan/cppscan.rl @@ -0,0 +1,207 @@ +/* + * A C++ scanner. Uses the longest match construction. + * << <= <<= >> >= >>= are left out since angle brackets are used in templates. + */ + +#include <string.h> +#include <stdlib.h> +#include <iostream> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +#define BUFSIZE 16384 + +/* EOF char used to flush out that last token. This should be a whitespace + * token. */ + +#define LAST_CHAR 0 + +using std::cerr; +using std::cout; +using std::cin; +using std::endl; + +static char buf[BUFSIZE]; +static int line = 1, col = 1; +static char *tokstart, *tokend; +static int act, have = 0; +static int cs; + +%%{ + machine Scanner; + write data nofinal; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + c_comment := + any* :>> '*/' + @{ fgoto main; }; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + {token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + {token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + {token( TK_Id );}; + + # Floating literals. + ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? ) + {token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + {token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + {token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + {token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' {token( TK_NameSep );}; + '==' {token( TK_EqualsEquals );}; + '!=' {token( TK_NotEquals );}; + '&&' {token( TK_AndAnd );}; + '||' {token( TK_OrOr );}; + '*=' {token( TK_MultAssign );}; + '/=' {token( TK_DivAssign );}; + '%=' {token( TK_PercentAssign );}; + '+=' {token( TK_PlusAssign );}; + '-=' {token( TK_MinusAssign );}; + '&=' {token( TK_AmpAssign );}; + '^=' {token( TK_CaretAssign );}; + '|=' {token( TK_BarAssign );}; + '++' {token( TK_PlusPlus );}; + '--' {token( TK_MinusMinus );}; + '->' {token( TK_Arrow );}; + '->*' {token( TK_ArrowStar );}; + '.*' {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' {token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) {token( tokstart[0] );}; + + # Comments and whitespace. + '/*' { fgoto c_comment; }; + '//' [^\n]* '\n'; + ( any - 33..126 )+; + + *|; +}%% + +void token( int tok ) +{ + char *data = tokstart; + int len = tokend - tokstart; + + cout << '<' << tok << "> "; + cout.write( data, len ); + cout << '\n'; + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + +int main() +{ + std::ios::sync_with_stdio(false); + + %% write init; + + /* Do the first read. */ + bool done = false; + while ( !done ) { + char *p = buf + have; + int space = BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + cerr << "OUT OF BUFFER SPACE" << endl; + exit(1); + } + + cin.read( p, space ); + int len = cin.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = LAST_CHAR, len++; + done = true; + } + + char *pe = p + len; + %% write exec; + + /* Check if we failed. */ + if ( cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + /* Now set up the prefix. */ + if ( tokstart == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - tokstart; + memmove( buf, tokstart, have ); + tokend -= (tokstart-buf); + tokstart = buf; + } + } + + return 0; +} diff --git a/examples/format/Makefile b/examples/format/Makefile new file mode 100644 index 0000000..d5ac829 --- /dev/null +++ b/examples/format/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: format + +ps: format.ps + +format: format.o + gcc -g -o format format.o + +format.c: format.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) format.rl | $(RLCODEGEN) -G2 -o format.c + +format.ps: format.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) format.rl | $(RLCODEGEN) -V | dot -Tps > format.ps + +%.o: %.c + gcc -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o format.c format format.ps diff --git a/examples/format/format.rl b/examples/format/format.rl new file mode 100644 index 0000000..ea5fdfb --- /dev/null +++ b/examples/format/format.rl @@ -0,0 +1,191 @@ +/* + * Partial printf implementation. + */ + +#define BUFLEN 1024 +#include <stdio.h> + +typedef void (*WriteFunc)( char *data, int len ); + +struct format +{ + char buf[BUFLEN+1]; + int buflen; + WriteFunc write; + + int flags; + int width; + int prec; + int cs; +}; + +void do_conv( struct format *fsm, char c ) +{ + printf( "flags: %x\n", fsm->flags ); + printf( "width: %i\n", fsm->width ); + printf( "prec: %i\n", fsm->prec ); + printf( "conv: %c\n", c ); + printf( "\n" ); +} + +#define FL_HASH 0x01 +#define FL_ZERO 0x02 +#define FL_DASH 0x04 +#define FL_SPACE 0x08 +#define FL_PLUS 0x10 + +#define FL_HAS_WIDTH 0x0100 +#define FL_WIDTH_ARG 0x0200 +#define FL_HAS_PREC 0x0400 +#define FL_PREC_ARG 0x0800 + +#define FL_LEN_H 0x010000 +#define FL_LEN_HH 0x020000 +#define FL_LEN_L 0x040000 +#define FL_LEN_LL 0x080000 + +%%{ + machine format; + access fsm->; + + action clear { + fsm->flags = 0; + fsm->width = 0; + fsm->prec = 0; + } + + # A non-zero number. + nznum = [1-9] [0-9]*; + + # Width + action width_num { fsm->width = 10 * fsm->width + (fc-'0'); } + action width_arg { fsm->flags |= FL_WIDTH_ARG; } + action width { fsm->flags |= FL_HAS_WIDTH; } + width = ( ( nznum $width_num | '*' @width_arg ) %width )?; + + # Precision + action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); } + action prec_arg { fsm->flags |= FL_PREC_ARG; } + action prec { fsm->flags |= FL_HAS_PREC; } + precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?; + + # Flags + action flags_hash { fsm->flags |= FL_HASH; } + action flags_zero { fsm->flags |= FL_ZERO; } + action flags_dash { fsm->flags |= FL_DASH; } + action flags_space { fsm->flags |= FL_SPACE; } + action flags_plus { fsm->flags |= FL_PLUS; } + + flags = ( + '#' @flags_hash | + '0' @flags_zero | + '-' @flags_dash | + ' ' @flags_space | + '+' @flags_plus )*; + + action length_h { fsm->flags |= FL_LEN_H; } + action length_l { fsm->flags |= FL_LEN_L; } + action length_hh { fsm->flags |= FL_LEN_HH; } + action length_ll { fsm->flags |= FL_LEN_LL; } + + # Must use leaving transitions on 'h' and 'l' because they are + # prefixes for 'hh' and 'll'. + length = ( + 'h' %length_h | + 'l' %length_l | + 'hh' @length_hh | + 'll' @length_ll )?; + + action conversion { + do_conv( fsm, fc ); + } + + conversion = [diouxXcsp] @conversion; + + fmt_spec = + '%' @clear + flags + width + precision + length + conversion; + + action emit { + if ( fsm->buflen == BUFLEN ) { + fsm->write( fsm->buf, fsm->buflen ); + fsm->buflen = 0; + } + fsm->buf[fsm->buflen++] = fc; + } + + action finish_ok { + if ( fsm->buflen > 0 ) + fsm->write( fsm->buf, fsm->buflen ); + } + action finish_err { + printf("EOF IN FORMAT\n"); + } + action err_char { + printf("ERROR ON CHAR: 0x%x\n", fc ); + } + + main := ( + [^%] @emit | + '%%' @emit | + fmt_spec + )* @/finish_err %/finish_ok $!err_char; +}%% + +%% write data; + +void format_init( struct format *fsm ) +{ + fsm->buflen = 0; + %% write init; +} + +void format_execute( struct format *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +int format_finish( struct format *fsm ) +{ + %% write eof; + + if ( fsm->cs == format_error ) + return -1; + if ( fsm->cs >= format_first_final ) + return 1; + return 0; +} + + +#define INPUT_BUFSIZE 2048 + +struct format fsm; +char buf[INPUT_BUFSIZE]; + +void write(char *data, int len ) +{ + fwrite( data, 1, len, stdout ); +} + +int main() +{ + fsm.write = write; + format_init( &fsm ); + while ( 1 ) { + int len = fread( buf, 1, INPUT_BUFSIZE, stdin ); + format_execute( &fsm, buf, len ); + if ( len != INPUT_BUFSIZE ) + break; + } + if ( format_finish( &fsm ) <= 0 ) + printf("FAIL\n"); + return 0; +} + diff --git a/examples/gotocallret/Makefile b/examples/gotocallret/Makefile new file mode 100644 index 0000000..13f9818 --- /dev/null +++ b/examples/gotocallret/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: gotocallret + +ps: gotocallret.ps + +gotocallret: gotocallret.o + g++ -g -o gotocallret gotocallret.o + +gotocallret.cpp: gotocallret.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) gotocallret.rl | $(RLCODEGEN) -G2 -o gotocallret.cpp + +gotocallret.o: gotocallret.cpp + g++ -Wall -g -c -O3 -o $@ $< + +gotocallret.ps: gotocallret.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) gotocallret.rl | $(RLCODEGEN) -V | dot -Tps > gotocallret.ps + +distclean clean: + rm -Rf *.o gotocallret.cpp gotocallret gotocallret.ps diff --git a/examples/gotocallret/gotocallret.rl b/examples/gotocallret/gotocallret.rl new file mode 100644 index 0000000..84384a9 --- /dev/null +++ b/examples/gotocallret/gotocallret.rl @@ -0,0 +1,103 @@ +/* + * Demonstrate the use of goto, call and return. This machine expects either a + * lower case char or a digit as a command then a space followed by the command + * arg. If the command is a char, then the arg must be an a string of chars. + * If the command is a digit, then the arg must be a string of digits. This + * choice is determined by action code, rather than though transition + * desitinations. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +struct GotoCallRet +{ + char comm; + int cs, top, stack[32]; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine GotoCallRet; + + # Error machine, consumes to end of + # line, then starts the main line over. + garble_line := ( + (any-'\n')*'\n' + ) >{cout << "error: garbling line" << endl;} @{fgoto main;}; + + # Look for a string of alphas or of digits, + # on anything else, hold the character and return. + alp_comm := alpha+ $!{fhold;fret;}; + dig_comm := digit+ $!{fhold;fret;}; + + # Choose which to machine to call into based on the command. + action comm_arg { + if ( comm >= 'a' ) + fcall alp_comm; + else + fcall dig_comm; + } + + # Specifies command string. Note that the arg is left out. + command = ( + [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n' + ) @{cout << "correct command" << endl;}; + + # Any number of commands. If there is an + # error anywhere, garble the line. + main := command* $!{fhold;fgoto garble_line;}; +}%% + +%% write data; + +int GotoCallRet::init( ) +{ + %% write init; + return 1; +} + +int GotoCallRet::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + if ( cs == GotoCallRet_error ) + return -1; + if ( cs >= GotoCallRet_first_final ) + return 1; + return 0; +} + +int GotoCallRet::finish( ) +{ + %% write eof; + if ( cs == GotoCallRet_error ) + return -1; + if ( cs >= GotoCallRet_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + + GotoCallRet gcr; + gcr.init(); + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + gcr.execute( buf, strlen(buf) ); + } + if ( gcr.finish() <= 0 ) + cerr << "gotocallret: error: parsing input" << endl; + return 0; +} diff --git a/examples/mailbox/Makefile b/examples/mailbox/Makefile new file mode 100644 index 0000000..94d6680 --- /dev/null +++ b/examples/mailbox/Makefile @@ -0,0 +1,16 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: mailbox + +mailbox: mailbox.o + g++ -g -o mailbox mailbox.o + +mailbox.cpp: mailbox.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) mailbox.rl | $(RLCODEGEN) -G2 -o mailbox.cpp + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o mailbox.cpp mailbox mailbox.ps diff --git a/examples/mailbox/mailbox.rl b/examples/mailbox/mailbox.rl new file mode 100644 index 0000000..74e3310 --- /dev/null +++ b/examples/mailbox/mailbox.rl @@ -0,0 +1,206 @@ +/* + * Parses unix mail boxes into headers and bodies. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +/* A growable buffer for collecting headers. */ +struct Buffer +{ + Buffer() : data(0), allocated(0), length(0) { } + ~Buffer() { empty(); } + + void append( char p ) { + if ( ++length > allocated ) + upAllocate( length*2 ); + data[length-1] = p; + } + + void clear() { length = 0; } + void upAllocate( int len ); + void empty(); + + char *data; + int allocated; + int length; +}; + + +struct MailboxScanner +{ + Buffer headName; + Buffer headContent; + + int cs, top, stack[1]; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine MailboxScanner; + + # Buffer the header names. + action bufHeadName { headName.append(fc); } + + # Prints a blank line after the end of the headers of each message. + action blankLine { cout << endl; } + + # Helpers we will use in matching the date section of the from line. + day = /[A-Z][a-z][a-z]/; + month = /[A-Z][a-z][a-z]/; + year = /[0-9][0-9][0-9][0-9]/; + time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' ); + letterZone = /[A-Z][A-Z][A-Z]/; + numZone = /[+\-][0-9][0-9][0-9][0-9]/; + zone = letterZone | numZone; + dayNum = /[0-9 ][0-9]/; + + # These are the different formats of the date minus an obscure + # type that has a funny string 'remote from xxx' on the end. Taken + # from c-client in the imap-2000 distribution. + date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' . + ( year | year . ' ' . zone | zone . ' ' . year ); + + # From lines separate messages. We will exclude fromLine from a message + # body line. This will cause us to stay in message line up until an + # entirely correct from line is matched. + fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n'; + + # The types of characters that can be used as a header name. + hchar = print - [ :]; + + # Simply eat up an uninteresting header. Return at the first non-ws + # character following a newline. + consumeHeader := ( + [^\n] | + '\n' [ \t] | + '\n' [^ \t] @{fhold; fret;} + )*; + + action hchar {headContent.append(fc);} + action hspace {headContent.append(' ');} + + action hfinish { + headContent.append(0); + cout << headContent.data << endl; + headContent.clear(); + fhold; + fret; + } + + # Display the contents of a header as it is consumed. Collapses line + # continuations to a single space. + printHeader := ( + [^\n] @hchar | + ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace + )** $!hfinish; + + action onHeader + { + headName.append(0); + if ( strcmp( headName.data, "From" ) == 0 || + strcmp( headName.data, "To" ) == 0 || + strcmp( headName.data, "Subject" ) == 0 ) + { + /* Print the header name, then jump to a machine the will display + * the contents. */ + cout << headName.data << ":"; + headName.clear(); + fcall printHeader; + } + + headName.clear(); + fcall consumeHeader; + } + + header = hchar+ $bufHeadName ':' @onHeader; + + # Exclude fromLine from a messageLine, otherwise when encountering a + # fromLine we will be simultaneously matching the old message and a new + # message. + messageLine = ( [^\n]* '\n' - fromLine ); + + # An entire message. + message = ( fromLine . header* . '\n' @blankLine . messageLine* ); + + # File is a series of messages. + main := message*; +}%% + +%% write data; + +int MailboxScanner::init( ) +{ + %% write init; + return 1; +} + +int MailboxScanner::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == MailboxScanner_error ) + return -1; + if ( cs >= MailboxScanner_first_final ) + return 1; + return 0; +} + +int MailboxScanner::finish( ) +{ + %% write eof; + if ( cs == MailboxScanner_error ) + return -1; + if ( cs >= MailboxScanner_first_final ) + return 1; + return 0; +} + + +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +MailboxScanner mailbox; +char buf[BUFSIZE]; + +int main() +{ + mailbox.init(); + while ( 1 ) { + int len = fread( buf, 1, BUFSIZE, stdin ); + mailbox.execute( buf, len ); + if ( len != BUFSIZE ) + break; + } + if ( mailbox.finish() <= 0 ) + cerr << "mailbox: error parsing input" << endl; + return 0; +} diff --git a/examples/params/Makefile b/examples/params/Makefile new file mode 100644 index 0000000..98b950c --- /dev/null +++ b/examples/params/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: params + +ps: params.ps + +params: params.o + gcc -g -o params params.o + +params.c: params.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) params.rl | $(RLCODEGEN) -G2 -o params.c + +params.ps: params.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) params.rl | $(RLCODEGEN) -V | dot -Tps > params.ps + +%.o: %.c + gcc -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o params.c params params.ps diff --git a/examples/params/params.rl b/examples/params/params.rl new file mode 100644 index 0000000..3cf908f --- /dev/null +++ b/examples/params/params.rl @@ -0,0 +1,104 @@ +/* + * Parse command line arguments. + */ + +#include <stdio.h> +#include <string.h> + +#define BUFLEN 1024 + +struct params +{ + char buffer[BUFLEN+1]; + int buflen; + int cs; +}; + +%%{ + machine params; + access fsm->; + + # A buffer to collect argurments + + # Append to the buffer. + action append { + if ( fsm->buflen < BUFLEN ) + fsm->buffer[fsm->buflen++] = fc; + } + + # Terminate a buffer. + action term { + if ( fsm->buflen < BUFLEN ) + fsm->buffer[fsm->buflen++] = 0; + } + + # Clear out the buffer + action clear { fsm->buflen = 0; } + + action help { printf("help\n"); } + action version { printf("version\n"); } + action output { printf("output: \"%s\"\n", fsm->buffer); } + action spec { printf("spec: \"%s\"\n", fsm->buffer); } + action mach { printf("machine: \"%s\"\n", fsm->buffer); } + + # Helpers that collect strings + string = [^\0]+ >clear $append %term; + + # Different arguments. + help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help; + version = ( '-v' | '--version' ) 0 @version; + output = '-o' 0? string 0 @output; + spec = '-S' 0? string 0 @spec; + mach = '-M' 0? string 0 @mach; + + main := ( + help | + version | + output | + spec | + mach + )*; +}%% + +%% write data; + +void params_init( struct params *fsm ) +{ + fsm->buflen = 0; + %% write init; +} + +void params_execute( struct params *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +int params_finish( struct params *fsm ) +{ + %% write eof; + + if ( fsm->cs == params_error ) + return -1; + if ( fsm->cs >= params_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 2048 + +int main( int argc, char **argv ) +{ + int a; + struct params params; + + params_init( ¶ms ); + for ( a = 1; a < argc; a++ ) + params_execute( ¶ms, argv[a], strlen(argv[a])+1 ); + if ( params_finish( ¶ms ) != 1 ) + fprintf( stderr, "params: error processing arguments\n" ); + + return 0; +} diff --git a/examples/pullscan/Makefile b/examples/pullscan/Makefile new file mode 100644 index 0000000..1a048ea --- /dev/null +++ b/examples/pullscan/Makefile @@ -0,0 +1,23 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +CFLAGS = -Wall -g -O3 + +all: pullscan + +ps: pullscan.ps + +pullscan: pullscan.o + g++ -g -o $@ $< + +pullscan.c: pullscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) pullscan.rl | $(RLCODEGEN) -G2 -o $@ + +%.o: %.c + gcc $(CFLAGS) -c -o $@ $< + +pullscan.ps: pullscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) pullscan.rl | $(RLCODEGEN) -V | dot -Tps > pullscan.ps + +distclean clean: + rm -Rf *.o pullscan.c pullscan pullscan.ps diff --git a/examples/pullscan/pullscan.rl b/examples/pullscan/pullscan.rl new file mode 100644 index 0000000..79e3c49 --- /dev/null +++ b/examples/pullscan/pullscan.rl @@ -0,0 +1,166 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define BUFSIZE 4096 + +typedef struct _Scanner { + /* Scanner state. */ + int cs; + int act; + int have; + int curline; + char *tokstart; + char *tokend; + char *p; + char *pe; + FILE *file; + int done; + + /* Token data */ + char *data; + int len; + int value; + + char buf[BUFSIZE]; +} Scanner; + + +void scan_init( Scanner *s, FILE *file ) +{ + memset (s, '\0', sizeof(Scanner)); + s->curline = 1; + s->file = file; +} + +#define TK_NO_TOKEN (-1) +#define TK_ERR 128 +#define TK_EOF 129 +#define TK_Identifier 130 +#define TK_Number 131 + + +%%{ + machine Scanner; + write data; +}%% + +#define ret_tok( _tok ) token = _tok; s->data = s->tokstart + +int scan( Scanner *s ) +{ + char *p = s->p; + char *pe = s->pe; + int token = TK_NO_TOKEN; + int space, readlen; + + while ( 1 ) { + if ( p == pe ) { + printf("scanner: need more data\n"); + + if ( s->tokstart == 0 ) + s->have = 0; + else { + /* There is data that needs to be shifted over. */ + printf("scanner: buffer broken mid token\n"); + s->have = pe - s->tokstart; + memmove( s->buf, s->tokstart, s->have ); + s->tokend -= (s->tokstart-s->buf); + s->tokstart = s->buf; + } + + p = s->buf + s->have; + space = BUFSIZE - s->have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + printf("scanner: out of buffer space\n"); + return TK_ERR; + } + + if ( s->done ) { + printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + readlen = fread( p, 1, space, s->file ); + if ( readlen < space ) + s->done = 1; + } + + pe = p + readlen; + } + + %%{ + machine Scanner; + access s->; + + main := |* + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) => + { ret_tok( TK_Identifier ); fbreak; }; + + # Whitespace + [ \t\n]; + + # Number + digit+ => + { ret_tok( TK_Number ); fbreak; }; + + # EOF + 0 => + { ret_tok( TK_EOF ); fbreak; }; + + # Anything else + any => + { ret_tok( *p ); fbreak; }; + + *|; + + write exec; + }%% + + if ( s->cs == Scanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + /* Save p and pe. fbreak does not advance p. */ + s->p = p + 1; + s->pe = pe; + s->len = s->p - s->data; + return token; + } + } +} + + +int main (int argc, char** argv) +{ + Scanner ss; + int tok; + + scan_init(&ss, stdin); + + while ( 1 ) { + tok = scan (&ss); + if ( tok == TK_EOF ) { + printf ("parser: EOF\n"); + break; + } + else if ( tok == TK_ERR ) { + printf ("parser: ERR\n"); + break; + } + else { + printf ("parser: %d \"", tok); + fwrite ( ss.data, 1, ss.len, stdout ); + printf ("\"\n" ); + } + } + + return 0; +} + + diff --git a/examples/rlscan/Makefile b/examples/rlscan/Makefile new file mode 100644 index 0000000..2021d27 --- /dev/null +++ b/examples/rlscan/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: rlscan + +ps: rlscan.ps + +rlscan: rlscan.o + g++ -g -o rlscan rlscan.o + +rlscan.cpp: rlscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) rlscan.rl | $(RLCODEGEN) -G2 -o rlscan.cpp + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +rlscan.ps: rlscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) rlscan.rl | $(RLCODEGEN) -V | dot -Tps > rlscan.ps + +distclean clean: + rm -Rf *.o rlscan.cpp rlscan rlscan.ps diff --git a/examples/rlscan/rlscan.rl b/examples/rlscan/rlscan.rl new file mode 100644 index 0000000..f912b8d --- /dev/null +++ b/examples/rlscan/rlscan.rl @@ -0,0 +1,298 @@ +/* + * Lexes Ragel input files. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +void escapeXML( char *data ) +{ + while ( *data != 0 ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + data += 1; + } +} + +void escapeXML( char c ) +{ + switch ( c ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << c; break; + } +} + +void escapeXML( char *data, int len ) +{ + for ( char *end = data + len; data != end; data++ ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + } +} + +inline void write( char *data ) +{ + cout << data; +} + +inline void write( char c ) +{ + cout << c; +} + +inline void write( char *data, int len ) +{ + cout.write( data, len ); +} + + +%%{ + machine RagelScan; + + word = [a-zA-Z_][a-zA-Z_0-9]*; + integer = [0-9]+; + hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; + + default = ^0; + EOF = 0; + + # Handles comments in outside code and inline blocks. + c_comment := + ( default* :>> '*/' ) + ${ escapeXML( fc ); } + @{ fret; }; + + action emit { + escapeXML( tokstart, tokend-tokstart ); + } + + # + # Inline action code + # + + ilscan := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + '/*' { + write( "/*" ); + fcall c_comment; + }; + '//' [^\n]* '\n' => emit; + + '{' { + write( '{' ); + inline_depth += 1; + }; + + '}' { + write( '}' ); + /* If dropping down to the last } then return + * to ragel code. */ + if ( --inline_depth == 0 ) { + write( "</inline>\n" ); + fgoto rlscan; + } + }; + + default => { escapeXML( *tokstart ); }; + *|; + + # + # Ragel Tokens + # + + rlscan := |* + '}%%' { + if ( !single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + '\n' { + if ( single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + # Word + word { + write( "<word>" ); + write( tokstart, tokend-tokstart ); + write( "</word>\n" ); + }; + + # Decimal integer. + integer { + write( "<int>" ); + write( tokstart, tokend-tokstart ); + write( "</int>\n" ); + }; + + # Hexidecimal integer. + hex { + write( "<hex>" ); + write( tokstart, tokend-tokstart ); + write( "</hex>\n" ); + }; + + # Consume comments. + '#' [^\n]* '\n'; + + # Single literal string. + "'" ( [^'\\] | /\\./ )* "'" { + write( "<single_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</single_lit>\n" ); + }; + + # Double literal string. + '"' ( [^"\\] | /\\./ )* '"' { + write( "<double_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</double_lit>\n" ); + }; + + # Or literal. + '[' ( [^\]\\] | /\\./ )* ']' { + write( "<or_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</or_lit>\n" ); + }; + + # Regex Literal. + '/' ( [^/\\] | /\\./ ) * '/' { + write( "<re_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</re_lit>\n" ); + }; + + # Open an inline block + '{' { + inline_depth = 1; + write( "<inline>{" ); + fgoto ilscan; + }; + + punct { + write( "<symbol>" ); + escapeXML( fc ); + write( "</symbol>\n" ); + }; + + default; + *|; + + # + # Outside code. + # + + main := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + + '/*' { + escapeXML( tokstart, tokend-tokstart ); + fcall c_comment; + }; + + '//' [^\n]* '\n' => emit; + + '%%{' { + write( "<section>\n" ); + single_line = false; + fgoto rlscan; + }; + + '%%' { + write( "<section>\n" ); + single_line = true; + fgoto rlscan; + }; + + default { + escapeXML( *tokstart ); + }; + + # EOF. + EOF; + *|; +}%% + +%% write data nofinal; + +#define BUFSIZE 2048 + +int main() +{ + std::ios::sync_with_stdio(false); + + int cs, act; + char *tokstart, *tokend; + int stack[1], top; + + static char inbuf[BUFSIZE]; + bool single_line = false; + int inline_depth = 0; + + %% write init; + + bool done = false; + int have = 0; + while ( !done ) { + /* How much space is in the buffer? */ + int space = BUFSIZE - have; + if ( space == 0 ) { + /* Buffer is full. */ + cerr << "TOKEN TOO BIG" << endl; + exit(1); + } + + /* Read in a block. */ + char *p = inbuf + have; + cin.read( p, space ); + int len = cin.gcount(); + + /* Check for EOF. */ + if ( len == 0 ) { + p[0] = 0, len++; + done = true; + } + + char *pe = p + len; + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + if ( tokstart == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - tokstart; + memmove( inbuf, tokstart, have ); + tokend = inbuf + (tokend-tokstart); + tokstart = inbuf; + } + } + return 0; +} diff --git a/examples/statechart/Makefile b/examples/statechart/Makefile new file mode 100644 index 0000000..3dec7fd --- /dev/null +++ b/examples/statechart/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: statechart + +ps: statechart.ps + +statechart: statechart.o + g++ -g -o statechart statechart.o + +statechart.cpp: statechart.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) statechart.rl | $(RLCODEGEN) -G2 -o statechart.cpp + +statechart.o: statechart.cpp + g++ -Wall -g -c -O3 -o $@ $< + +statechart.ps: statechart.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) statechart.rl | $(RLCODEGEN) -V | dot -Tps > statechart.ps + +distclean clean: + rm -Rf *.o statechart.cpp statechart statechart.ps diff --git a/examples/statechart/statechart.rl b/examples/statechart/statechart.rl new file mode 100644 index 0000000..cb99a20 --- /dev/null +++ b/examples/statechart/statechart.rl @@ -0,0 +1,114 @@ +/* + * Demonstrate the use of labels, the epsilon operator, and the join operator + * for creating machines using the named state and transition list paradigm. + * This implementes the same machine as the atoi example. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +struct StateChart +{ + bool neg; + int val; + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine StateChart; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + action finish { + if ( neg ) + val = -1 * val; + } + + atoi = ( + start: ( + '-' @see_neg ->om_num | + '+' ->om_num | + [0-9] @add_digit ->more_nums + ), + + # One or more nums. + om_num: ( + [0-9] @add_digit ->more_nums + ), + + # Zero ore more nums. + more_nums: ( + [0-9] @add_digit ->more_nums | + '' -> final + ) + ) >begin %finish; + + main := ( atoi '\n' @{ cout << val << endl; } )*; +}%% + +%% write data; + +int StateChart::init( ) +{ + %% write init; + return 1; +} + +int StateChart::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == StateChart_error ) + return -1; + if ( cs >= StateChart_first_final ) + return 1; + return 0; +} + +int StateChart::finish( ) +{ + %% write eof; + if ( cs == StateChart_error ) + return -1; + if ( cs >= StateChart_first_final ) + return 1; + return 0; +} + + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + + StateChart atoi; + atoi.init(); + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + atoi.execute( buf, strlen(buf) ); + } + if ( atoi.finish() <= 0 ) + cerr << "statechart: error: parsing input" << endl; + return 0; +} diff --git a/examples/uri/uri.rl b/examples/uri/uri.rl new file mode 100644 index 0000000..185a76c --- /dev/null +++ b/examples/uri/uri.rl @@ -0,0 +1,31 @@ +%%{ + machine uri; + + action scheme {} + action loc {} + action item {} + action query {} + action last {} + action nothing {} + + main := + # Scheme machine. This is ambiguous with the item machine. We commit + # to the scheme machine on colon. + ( [^:/?#]+ ':' @(colon,1) @scheme )? + + # Location machine. This is ambiguous with the item machine. We remain + # ambiguous until a second slash, at that point and all points after + # we place a higher priority on staying in the location machine over + # moving into the item machine. + ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )? + + # Item machine. Ambiguous with both scheme and location, which both + # get a higher priority on the characters causing ambiguity. + ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )? + + # Last two components, the characters that initiate these machines are + # not supported in any previous components, therefore there are no + # ambiguities introduced by these parts. + ( '?' [^#]* %query %/query)? + ( '#' any* %/last )?; +}%% diff --git a/ragel.spec b/ragel.spec new file mode 100644 index 0000000..0dc97a7 --- /dev/null +++ b/ragel.spec @@ -0,0 +1,55 @@ +Summary: Ragel State Machine Compiler +Name: ragel +Version: 5.16 +Release: 1 + +URL: http://www.cs.queensu.ca/home/thurston/ragel/ +Vendor: Adrian Thurston +Packager: Adrian Thurston +Distribution: Any +Group: Development/Other +License: GPL + +Source0: http://www.cs.queensu.ca/home/thurston/ragel/%{name}-%{version}.tar.gz + +Prefix: /usr +BuildRoot: %_tmppath/%name-%version-root +BuildPreReq: gcc, make + +%description +Ragel compiles finite state machines from regular languages into executable C, +C++, Objective-C or D code. Ragel state machines can not only recognize byte +sequences as regular expression machines do, but can also execute code at +arbitrary points in the recognition of a regular language. Using custom +operators, Ragel allows the user to embed code into a regular language in +arbitrary places without disrupting the regular language syntax. Ragel also +provides operators for controlling nondeterminism, constructing machines using +state charts and building scanners. + +%prep +%setup -q -n %{name}-%{version} + +%build +./configure --prefix=%{prefix} +make CFLAGS="-O2 -Wall" +cd doc && make ragel.1 rlcodegen.1 + +%install +# Rather than 'make install', let RPM choose where +# things are kept on this system: +install -d $RPM_BUILD_ROOT%_bindir +install -s ragel/ragel $RPM_BUILD_ROOT%_bindir/ragel +install -s rlcodegen/rlcodegen $RPM_BUILD_ROOT%_bindir/rlcodegen +install -d $RPM_BUILD_ROOT%_mandir/man1 +install doc/ragel.1 $RPM_BUILD_ROOT%_mandir/man1/ragel.1 +install doc/rlcodegen.1 $RPM_BUILD_ROOT%_mandir/man1/rlcodegen.1 + +%files +%defattr(-,root,root) +%_bindir/ragel +%_bindir/rlcodegen +%_mandir/man1/ragel.1 +%_mandir/man1/rlcodegen.1 + +%clean + rm -rf $RPM_BUILD_ROOT diff --git a/ragel.vim b/ragel.vim new file mode 100644 index 0000000..4fc9c5b --- /dev/null +++ b/ragel.vim @@ -0,0 +1,161 @@ +" Vim syntax file +" +" Language: Ragel +" Author: Adrian Thurston + +syntax clear + +" +" Outside code +" + +" Comments +syntax region ocComment start="\/\*" end="\*\/" +syntax match ocComment "\/\/.*$" + +" Anything preprocessor +syntax match ocPreproc "#.*$" + +" Strings +syntax match ocLiteral "'\(\\.\|[^'\\]\)*'" +syntax match ocLiteral "\"\(\\.\|[^\"\\]\)*\"" + +" C/C++ Keywords +syntax keyword ocType unsigned signed void char short int long float double bool +syntax keyword ocType inline static extern register const volatile auto +syntax keyword ocType union enum struct class typedef +syntax keyword ocType namespace template typename mutable +syntax keyword ocKeyword break continue default do else for +syntax keyword ocKeyword goto if return switch while +syntax keyword ocKeyword new delete this using friend public private protected sizeof +syntax keyword ocKeyword throw try catch operator typeid +syntax keyword ocKeyword and bitor xor compl bitand and_eq or_eq xor_eq not not_eq +syntax keyword ocKeyword static_cast dynamic_cast + +" D Keywords +syntax keyword ocType wchar dchar bit byte ubyte ushort uint ulong cent ucent +syntax keyword ocType cfloat ifloat cdouble idouble real creal ireal +syntax keyword ocKeyword abstract alias align asm assert body cast debug delegate +syntax keyword ocKeyword deprecated export final finally foreach function import in inout +syntax keyword ocKeyword interface invariant is mixin module out override package pragma +syntax keyword ocKeyword super synchronized typeof unittest version with + +" Java Keywords +syntax keyword ocType byte short char int + +" Objective-C Directives +syntax match ocKeyword "@public\|@private\|@protected" +syntax match ocKeyword "@interface\|@implementation" +syntax match ocKeyword "@class\|@end\|@defs" +syntax match ocKeyword "@encode\|@protocol\|@selector" + +" Numbers +syntax match ocNumber "[0-9][0-9]*" +syntax match ocNumber "0x[0-9a-fA-F][0-9a-fA-F]*" + +" Booleans +syntax keyword ocBoolean true false + +" Identifiers +syntax match anyId "[a-zA-Z_][a-zA-Z_0-9]*" + +" Inline code only +syntax keyword fsmType fpc fc fcurs fbuf fblen ftargs fstack +syntax keyword fsmKeyword fhold fgoto fcall fret fentry fnext fexec fbreak + +syntax cluster rlItems contains=rlComment,rlLiteral,rlAugmentOps,rlOtherOps,rlKeywords,rlWrite,rlCodeCurly,rlCodeSemi,rlNumber,anyId,rlLabelColon,rlExprKeywords + +syntax region machineSpec1 matchgroup=beginRL start="%%{" end="}%%" contains=@rlItems +syntax region machineSpec2 matchgroup=beginRL start="%%[^{]"rs=e-1 end="$" keepend contains=@rlItems +syntax region machineSpec2 matchgroup=beginRL start="%%$" end="$" keepend contains=@rlItems + +" Comments +syntax match rlComment "#.*$" contained + +" Literals +syntax match rlLiteral "'\(\\.\|[^'\\]\)*'[i]*" contained +syntax match rlLiteral "\"\(\\.\|[^\"\\]\)*\"[i]*" contained +syntax match rlLiteral /\/\(\\.\|[^\/\\]\)*\/[i]*/ contained +syntax match rlLiteral "\[\(\\.\|[^\]\\]\)*\]" contained + +" Numbers +syntax match rlNumber "[0-9][0-9]*" contained +syntax match rlNumber "0x[0-9a-fA-F][0-9a-fA-F]*" contained + +" Operators +syntax match rlAugmentOps "[>$%@]" contained +syntax match rlAugmentOps "<>\|<" contained +syntax match rlAugmentOps "[>\<$%@][!\^/*~]" contained +syntax match rlAugmentOps "[>$%]?" contained +syntax match rlAugmentOps "<>[!\^/*~]" contained +syntax match rlAugmentOps "=>" contained +syntax match rlOtherOps "->" contained + +syntax match rlOtherOps ":>" contained +syntax match rlOtherOps ":>>" contained +syntax match rlOtherOps "<:" contained + +" Keywords +syntax keyword rlKeywords machine action context include range contained +syntax keyword rlExprKeywords when err lerr eof from to contained + +" Case Labels +syntax keyword caseLabelKeyword case contained +syntax cluster caseLabelItems contains=ocComment,ocPreproc,ocLiteral,ocType,ocKeyword,caseLabelKeyword,ocNumber,ocBoolean,anyId,fsmType,fsmKeyword +syntax match caseLabelColon "case" contains=@caseLabelItems +syntax match caseLabelColon "case[\t ]\+.*:$" contains=@caseLabelItems +syntax match caseLabelColon "case[\t ]\+.*:[^=:]"me=e-1 contains=@caseLabelItems + +" Labels +syntax match ocLabelColon "^[\t ]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:$" contains=anyLabel +syntax match ocLabelColon "^[\t ]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:[^=:]"me=e-1 contains=anyLabel + +syntax match rlLabelColon "[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:$" contained contains=anyLabel +syntax match rlLabelColon "[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:[^=:>]"me=e-1 contained contains=anyLabel +syntax match anyLabel "[a-zA-Z_][a-zA-Z_0-9]*" contained + +" All items that can go in a code block. + +syntax cluster inlineItems contains=rlCodeCurly,ocComment,ocPreproc,ocLiteral,ocType,ocKeyword,ocNumber,ocBoolean,ocLabelColon,anyId,fsmType,fsmKeyword,caseLabelColon + +" Blocks of code. rlCodeCurly is recursive. +syntax region rlCodeCurly matchgroup=NONE start="{" end="}" contained contains=@inlineItems +syntax region rlCodeSemi matchgroup=Type start="\<alphtype\>" start="\<getkey\>" start="\<access\>" start="\<variable\>" matchgroup=NONE end=";" contained contains=@inlineItems + +syntax region rlWrite matchgroup=Type start="\<write\>" matchgroup=NONE end=";" contained contains=rlWriteKeywords,rlWriteOptions + +syntax keyword rlWriteKeywords init data exec eof contained +syntax keyword rlWriteOptions noerror nofinal noprefix noend contained + +" +" Sync at the start of machine specs. +" +syntax sync match ragelSyncPat grouphere NONE "%%{&" +syntax sync match ragelSyncPat grouphere NONE "%%[^{]&" +syntax sync match ragelSyncPat grouphere NONE "}%%" + +" +" Specifying Groups +" +hi link ocComment Comment +hi link ocPreproc Macro +hi link ocLiteral String +hi link ocType Type +hi link ocKeyword Keyword +hi link ocNumber Number +hi link ocBoolean Boolean +hi link rlComment Comment +hi link rlNumber Number +hi link rlLiteral String +hi link rlAugmentOps Keyword +hi link rlExprKeywords Keyword +hi link rlWriteKeywords Keyword +hi link rlWriteOptions Keyword +hi link rlKeywords Type +hi link fsmType Type +hi link fsmKeyword Keyword +hi link anyLabel Label +hi link caseLabelKeyword Keyword +hi link beginRL Type + +let b:current_syntax = "ragel" diff --git a/ragel/Makefile.in b/ragel/Makefile.in new file mode 100644 index 0000000..9b252a7 --- /dev/null +++ b/ragel/Makefile.in @@ -0,0 +1,85 @@ +# +# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INCS += -I../common -I../aapl +DEFS += + +CFLAGS += -g -Wall +LDFLAGS += + +CC_SRCS = \ + main.cpp parsetree.cpp parsedata.cpp fsmstate.cpp fsmbase.cpp \ + fsmattach.cpp fsmmin.cpp fsmgraph.cpp fsmap.cpp xmlcodegen.cpp \ + rlscan.cpp rlparse.cpp + +GEN_SRC = rlscan.cpp rlparse.h rlparse.cpp + +LIBS += @LIBS@ +PREFIX = @prefix@ + +BUILD_PARSERS = @BUILD_PARSERS@ + +#************************************* + +# Programs +CXX = @CXX@ + +# Get objects and dependencies from sources. +OBJS = $(CC_SRCS:%.cpp=%.o) +DEPS = $(CC_SRCS:%.cpp=.%.d) + +# Rules. +all: ragel + +ragel: $(GEN_SRC) $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + +ifeq ($(BUILD_PARSERS),true) + +rlparse.h: rlparse.kh + kelbt -o $@ $< + +rlparse.cpp: rlparse.kl rlparse.kh + kelbt -o $@ $< + +rlscan.cpp: rlscan.rl + ragel $< | rlcodegen -G2 -o $@ + +endif + +%.o: %.cpp + @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d + $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $< + +distclean: clean + rm -f Makefile + +ifeq ($(BUILD_PARSERS),true) +EXTRA_CLEAN = $(GEN_SRC) +endif + +clean: + rm -f tags .*.d *.o ragel $(EXTRA_CLEAN) + +install: all + install -d $(PREFIX)/bin + install -s ragel $(PREFIX)/bin/ragel + +-include $(DEPS) diff --git a/ragel/fsmap.cpp b/ragel/fsmap.cpp new file mode 100644 index 0000000..551aea0 --- /dev/null +++ b/ragel/fsmap.cpp @@ -0,0 +1,840 @@ +/* + * Copyright 2002-2004 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include <iostream> +using std::cerr; +using std::endl; + +CondData *condData = 0; +KeyOps *keyOps = 0; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, LongestMatchPart *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmAp::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmAp::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmAp::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmAp::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmAp::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmAp::fillGaps( StateAp *state ) +{ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( keyOps->minKey < trans->lowKey ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < trans->lowKey ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( lastHigh < keyOps->maxKey ) { + /* Get a copy of the default. */ + lastHigh.increment(); + + attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); + } + } +} + +void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + + +/* Give a target state for error transitions. */ +void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->fromState, target, trans ); + trans->actionTable.setActions( orderings, actions, nActs ); + } + } +} + +void FsmAp::transferErrorActions( StateAp *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmAp::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmAp::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmAp::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmAp::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmAp::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in all states. */ +void FsmAp::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmAp::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmAp::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmAp::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmAp::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmAp::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + trans->priorTable.empty(); + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmAp::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmAp::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & SB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSet.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & SB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) + return -1; + else if ( pd1->desc->priority > pd2->desc->priority ) + return 1; + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void FsmAp::addInTrans( TransAp *destTrans, TransAp *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out condition sets. */ + cmpRes = CmpActionSet::compare( state1->outCondSet, + state2->outCondSet ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + return CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmAp::clearOutData( StateAp *state ) +{ + /* Kill the out actions and priorities. */ + state->outActionTable.empty(); + state->outCondSet.empty(); + state->outPriorTable.empty(); +} + +bool FsmAp::hasOutData( StateAp *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSet.length() > 0 || + state->outPriorTable.length() > 0 ); +} + +/* + * Setting Conditions. + */ + + +void logNewExpansion( Expansion *exp ); +void logCondSpace( CondSpace *condSpace ); + +CondSpace *FsmAp::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + Key baseKey = condData->nextCondKey; + condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); + + condSpace = new CondSpace( condSet ); + condSpace->baseKey = baseKey; + condData->condSpaceMap.insert( condSpace ); + + #ifdef LOG_CONDS + cerr << "adding new condition space" << endl; + cerr << " condition set: "; + logCondSpace( condSpace ); + cerr << endl; + cerr << " baseKey: " << baseKey.getVal() << endl; + #endif + } + return condSpace; +} + +void FsmAp::startFsmCondition( Action *condAction ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + embedCondition( startState, condAction ); +} + +void FsmAp::allTransCondition( Action *condAction ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( state, condAction ); +} + +void FsmAp::leaveFsmCondition( Action *condAction ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outCondSet.insert( condAction ); +} diff --git a/ragel/fsmattach.cpp b/ragel/fsmattach.cpp new file mode 100644 index 0000000..6a90df6 --- /dev/null +++ b/ragel/fsmattach.cpp @@ -0,0 +1,425 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Insert a transition into an inlist. The head must be supplied. */ +void FsmAp::attachToInList( StateAp *from, StateAp *to, + TransAp *&head, TransAp *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void FsmAp::detachFromInList( StateAp *from, StateAp *to, + TransAp *&head, TransAp *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +/* Attach states on the default transition, range list or on out/in list key. + * First makes a new transition. If there is already a transition out from + * fromState on the default, then will assertion fail. */ +TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + TransAp *retVal = new TransAp(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + /* Attach using inList as the head pointer. */ + if ( to != 0 ) + attachToInList( from, to, to->inList.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmAp::attachTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmAp::detachTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + /* Detach using to's inList pointer as the head. */ + detachFromInList( from, to, to->inList.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmAp::detachState( StateAp *state ) +{ + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inList.head != 0 ) { + /* Get pointers to the trans and the state. */ + TransAp *trans = state->inList.head; + StateAp *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans; + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + detachTrans( state, trans->toState, trans ); + delete trans; + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans ) +{ + /* Make a new transition. */ + TransAp *newTrans = new TransAp(); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newTrans ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newTrans, srcTrans ); + + return newTrans; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +TransAp *FsmAp::fsmAttachStates( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + StateAp *toState = srcTrans->toState; + StateAp *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( md.stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + StateAp *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Add to the fill list. */ + md.fillListAppend( combinState ); + } + + /* Get the state insertted/deleted. */ + StateAp *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +TransAp *FsmAp::mergeTrans( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + TransAp *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +TransAp *FsmAp::crossTransitions( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + TransAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + retTrans = dupTrans( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( md, from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmAp::outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + PairIter<TransAp> outPair( dest->outList.head, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + TransAp *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangeOverlap: { + /* Exact overlap, cross them. */ + TransAp *newTrans = crossTransitions( md, dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmAp::inTransMove( StateAp *dest, StateAp *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inList.head != 0 ) { + /* Get trans and from state. */ + TransAp *trans = src->inList.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} diff --git a/ragel/fsmbase.cpp b/ragel/fsmbase.cpp new file mode 100644 index 0000000..16841d0 --- /dev/null +++ b/ragel/fsmbase.cpp @@ -0,0 +1,485 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +/* Simple singly linked list append routine for the fill list. The new state + * goes to the end of the list. */ +void MergeData::fillListAppend( StateAp *state ) +{ + state->alg.next = 0; + + if ( stfillHead == 0 ) { + /* List is empty, state becomes head and tail. */ + stfillHead = state; + stfillTail = state; + } + else { + /* List is not empty, state goes after last element. */ + stfillTail->alg.next = state; + stfillTail = state; + } +} + +/* Graph constructor. */ +FsmAp::FsmAp() +: + /* No start state. */ + startState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmAp::FsmAp( const FsmAp &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + StateAp *newState = new StateAp( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->toState = 0; + attachTrans( state, toState, trans ); + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmAp::~FsmAp() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + state->outList.empty(); + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmAp::setFinState( StateAp *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmAp::unsetFinState( StateAp *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & SB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ SB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmAp::setStartState( StateAp *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmAp::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmAp::setEntry( int id, StateAp *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmAp::unsetEntry( int id, StateAp *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmAp::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmAp::changeEntry( int id, StateAp *to, StateAp *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmAp::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmAp::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmAp::markReachableFromHere( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + markReachableFromHere( trans->toState ); + } +} + +void FsmAp::markReachableFromHereStopFinal( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + StateAp *toState = trans->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmAp::markReachableFromHereReverse( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + markReachableFromHereReverse( trans->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmAp::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inList.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmAp::copyInEntryPoints( FsmAp *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + +void FsmAp::setStateNumbers() +{ + int curNum = 0; + StateList::Iter state = stateList; + for ( ; state.lte(); state++ ) + state->alg.stateNum = curNum++; +} + + +void FsmAp::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ SB_ISFINAL; + finStateSet.empty(); +} + +void FsmAp::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmAp::verifyIntegrity() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + assert( trans->fromState == state ); + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + assert( trans->toState == state ); + } +} + +void FsmAp::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmAp::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= SB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} diff --git a/ragel/fsmgraph.cpp b/ragel/fsmgraph.cpp new file mode 100644 index 0000000..41c4b44 --- /dev/null +++ b/ragel/fsmgraph.cpp @@ -0,0 +1,1399 @@ +/* + * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +using std::cerr; +using std::endl; + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +StateAp *FsmAp::addState() +{ + /* Make the new state to return. */ + StateAp *state = new StateAp(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +void FsmAp::concatFsm( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + StateAp *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = addState(); + attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Case insensitive version of concatFsm. */ +void FsmAp::concatFsmCI( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + StateAp *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = addState(); + + KeySet keySet; + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. IsSigned + * determines if the integers are to be considered as signed or unsigned ints. */ +void FsmAp::concatFsm( Key chr ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + /* Attach on the character. */ + attachNewTrans( startState, end, chr, chr ); +} + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +void FsmAp::orFsm( Key *set, int len ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( set[i-1] < set[i] ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + attachNewTrans( startState, end, set[i], set[i] ); +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +void FsmAp::rangeFsm( Key low, Key high ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + /* Attach using the range of characters. */ + attachNewTrans( startState, end, low, high ); +} + +/* Construct a machine that a repeated range of characters. */ +void FsmAp::rangeStarFsm( Key low, Key high) +{ + /* One state which is final and is the start state. */ + setStartState( addState() ); + setFinState( startState ); + + /* Attach start to start using range of characters. */ + attachNewTrans( startState, startState, low, high ); +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +void FsmAp::lambdaFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); + setFinState( startState ); +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +void FsmAp::emptyFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); +} + +void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->priorTable.setPriors( srcState->outPriorTable ); + } + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +void FsmAp::starOp( ) +{ + /* For the merging process. */ + MergeData md; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + StateAp *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { + if ( *st != startState ) + mergeStatesLeaving( md, *st, startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( startState->isFinState() ) + mergeStatesLeaving( md, startState, startState ); + + /* Now ensure the new start state is a final state. */ + setFinState( startState ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmAp::repeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return; + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *this ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmAp *dup = new FsmAp( *copyFrom ); + doConcat( dup, 0, false ); + } + + /* Now use the copyFrom on the end. */ + doConcat( copyFrom, 0, false ); +} + +void FsmAp::optionalRepeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + setFinState( startState ); + return; + } + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *this ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + setFinState( startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmAp *dup = new FsmAp( *copyFrom ); + dup->setFinBits( SB_GRAPH2 ); + doConcat( dup, &lastFinSet, true ); + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + StateAp *fs = finStateSet[i]; + if ( fs->stateBits & SB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~SB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + doConcat( copyFrom, &lastFinSet, true ); +} + + +/* Fsm concatentation worker. Supports treating the concatentation as optional, + * which essentially leaves the final states of machine one as final. */ +void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional ) +{ + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + MergeData md; + + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + StateAp *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + unsetAllFinStates(); + finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + StateAp *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + mergeStatesLeaving( md, state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. */ +void FsmAp::concatOp( FsmAp *other ) +{ + /* Assert same signedness and return graph concatenation op. */ + doConcat( other, 0, false ); +} + + +void FsmAp::doOr( FsmAp *other ) +{ + /* For the merging process. */ + MergeData md; + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + setStartState( addState() ); + + /* Merge the start states. */ + mergeStates( md, startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + fillInStates( md ); +} + +/* Unions other with this machine. Other is deleted. */ +void FsmAp::unionOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + doOr( other ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Intersects other with this machine. Other is deleted. */ +void FsmAp::intersectOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + setFinBits( SB_GRAPH1 ); + other->setFinBits( SB_GRAPH2 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +void FsmAp::subtractOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( SB_GRAPH1 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + StateAp *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmAp::shadowReadWriteStates( MergeData &md ) +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + StateAp *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + StateAp *shadow = addState(); + mergeStates( md, shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmAp::resolveEpsilonTrans( MergeData &md ) +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates( md ); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( md, st, ept->targ ); + else + mergeStates( md, st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +void FsmAp::epsilonOp() +{ + /* For merging process. */ + MergeData md; + + setMisfitAccounting( true ); + + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + resolveEpsilonTrans( md ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers ) +{ + /* For the merging process. */ + MergeData md; + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + setStartState( addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + StateAp *newStart = addState(); + setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + mergeStates( md, newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = finStateSet; + + /* Now all final states are unset. */ + unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + StateAp *finState = addState(); + setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + resolveEpsilonTrans( md ); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & SB_ISFINAL) ) + clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + removeUnreachableStates(); +} + +void FsmAp::globOp( FsmAp **others, int numOthers ) +{ + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +void FsmAp::deterministicEntry() +{ + /* For the merging process. */ + MergeData md; + + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + StateAp *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( md, newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & SB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & SB_BOTH && + (state->stateBits & SB_BOTH) != SB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_BOTH; + } +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +void FsmAp::isolateStartState( ) +{ + /* For the merging process. */ + MergeData md; + + /* Bail out if the start state is already isolated. */ + if ( isStartStateIsolated() ) + return; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + StateAp *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( md.stateDict.treeSize == 0 ); + assert( md.stfillHead == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +#ifdef LOG_CONDS +void logCondSpace( CondSpace *condSpace ) +{ + if ( condSpace == 0 ) + cerr << "<empty>"; + else { + for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { + if ( ! csi.last() ) + cerr << ','; + (*csi)->actionName( cerr ); + } + } +} + +void logNewExpansion( Expansion *exp ) +{ + cerr << "created expansion:" << endl; + cerr << " range: " << exp->lowKey.getVal() << " .. " << + exp->highKey.getVal() << endl; + + cerr << " fromCondSpace: "; + logCondSpace( exp->fromCondSpace ); + cerr << endl; + cerr << " fromVals: " << exp->fromVals << endl; + + cerr << " toCondSpace: "; + logCondSpace( exp->toCondSpace ); + cerr << endl; + cerr << " toValsList: "; + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) + cerr << " " << *to; + cerr << endl; +} +#endif + + +void FsmAp::findTransExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ) +{ + PairIter<TransAp, StateCond> transCond( destState->outList.head, + srcState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + if ( transCond.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + CondSpace *srcCS = transCond.s2Tel.trans->condSpace; + expansion->toCondSpace = srcCS; + + long numTargVals = (1 << srcCS->condSet.length()); + for ( long targVals = 0; targVals < numTargVals; targVals++ ) + expansion->toValsList.append( targVals ); + + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + } +} + +void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long fromVals, LongVect &toValsList ) +{ + TransAp searchTrans; + searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (lowKey - keyOps->minKey); + searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (highKey - keyOps->minKey); + searchTrans.prev = searchTrans.next = 0; + + PairIter<TransAp> pairIter( state->outList.head, &searchTrans ); + for ( ; !pairIter.end(); pairIter++ ) { + if ( pairIter.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( lowKey, highKey ); + expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; + expansion->fromCondSpace = fromCondSpace; + expansion->fromVals = fromVals; + expansion->toCondSpace = toCondSpace; + expansion->toValsList = toValsList; + + expansionList.append( expansion ); + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + } + } +} + +void FsmAp::findCondExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ) +{ + PairIter<StateCond, StateCond> condCond( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !condCond.end(); condCond++ ) { + if ( condCond.userState == RangeOverlap ) { + /* Loop over all existing condVals . */ + CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; + long destLen = destCS.length(); + + /* Find the items in src cond set that are not in dest + * cond set. These are the items that we must expand. */ + CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; + for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) + srcOnlyCS.remove( *dcsi ); + long srcOnlyLen = srcOnlyCS.length(); + + if ( srcOnlyCS.length() > 0 ) { + #ifdef LOG_CONDS + cerr << "there are " << srcOnlyCS.length() << " item(s) that are " + "only in the srcCS" << endl; + #endif + + CondSet mergedCS = destCS; + mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); + + CondSpace *fromCondSpace = addCondSpace( destCS ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + /* Loop all new values. */ + LongVect expandToVals; + for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { + long targVals = basicVals; + for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { + if ( soVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + } + } + expandToVals.append( targVals ); + } + + findCondExpInTrans( expansionList, destState, + condCond.s1Tel.lowKey, condCond.s1Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + } + } + } +} + +void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { + long targVals = *to; + + /* We will use the copy of the transition that was made when the + * expansion was created. It will get used multiple times. Each + * time we must set up the keys, everything else is constant and + * and already prepared. */ + TransAp *srcTrans = exp->fromTrans; + + srcTrans->lowKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + srcTrans->highKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + + TransList srcList; + srcList.append( srcTrans ); + outTransCopy( md, destState, srcList.head ); + srcList.abandon(); + } + } +} + + +void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + Removal removal; + if ( exp->fromCondSpace == 0 ) { + removal.lowKey = exp->lowKey; + removal.highKey = exp->highKey; + } + else { + removal.lowKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + removal.highKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + } + removal.next = 0; + + TransList destList; + PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + TransAp *destTrans = pairIter.s1Tel.trans; + destTrans->lowKey = pairIter.s1Tel.lowKey; + destTrans->highKey = pairIter.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: + break; + case RangeOverlap: { + TransAp *trans = pairIter.s1Tel.trans; + detachTrans( trans->fromState, trans->toState, trans ); + delete trans; + break; + } + case BreakS1: { + pairIter.s1Tel.trans = dupTrans( destState, + pairIter.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + destState->outList.transfer( destList ); + } +} + +void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState ) +{ + StateCondList destList; + PairIter<StateCond> pairIter( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + StateCond *destCond = pairIter.s1Tel.trans; + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case RangeInS2: { + StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); + newCond->lowKey = pairIter.s2Tel.lowKey; + newCond->highKey = pairIter.s2Tel.highKey; + destList.append( newCond ); + break; + } + case RangeOverlap: { + StateCond *destCond = pairIter.s1Tel.trans; + StateCond *srcCond = pairIter.s2Tel.trans; + CondSet mergedCondSet; + mergedCondSet.insert( destCond->condSpace->condSet ); + mergedCondSet.insert( srcCond->condSpace->condSet ); + destCond->condSpace = addCondSpace( mergedCondSet ); + + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case BreakS1: + pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); + break; + + case BreakS2: + break; + } + } + destState->stateCondList.transfer( destList ); +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the souce state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState ) +{ + if ( !hasOutData( destState ) ) + mergeStates( md, destState, srcState ); + else { + StateAp *ssMutable = addState(); + mergeStates( md, ssMutable, srcState ); + transferOutData( ssMutable, destState ); + + for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) + embedCondition( md, ssMutable, *cond ); + + mergeStates( md, destState, ssMutable ); + } +} + +void FsmAp::mergeStates( MergeData &md, StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( md, destState, srcStates[s] ); +} + +void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState ) +{ + ExpansionList expList1; + ExpansionList expList2; + + findTransExpansions( expList1, destState, srcState ); + findCondExpansions( expList1, destState, srcState ); + findTransExpansions( expList2, srcState, destState ); + findCondExpansions( expList2, srcState, destState ); + + mergeStateConds( destState, srcState ); + + outTransCopy( md, destState, srcState->outList.head ); + + doExpand( md, destState, expList1 ); + doExpand( md, destState, expList2 ); + + doRemove( md, destState, expList1 ); + doRemove( md, destState, expList2 ); + + expList1.empty(); + expList2.empty(); + + /* Get its bits and final state status. */ + destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); + + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->outCondSet.insert( srcState->outCondSet ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + } +} + +void FsmAp::fillInStates( MergeData &md ) +{ + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + StateAp *state = md.stfillHead; + while ( state != 0 ) { + StateSet *stateSet = &state->stateDictEl->stateSet; + mergeStates( md, state, stateSet->data, stateSet->length() ); + state = state->alg.next; + } + + /* Delete the state sets of all states that are on the fill list. */ + state = md.stfillHead; + while ( state != 0 ) { + /* Delete and reset the state set. */ + delete state->stateDictEl; + state->stateDictEl = 0; + + /* Next state in the stfill list. */ + state = state->alg.next; + } + + /* StateDict will still have its ptrs/size set but all of it's element + * will be deleted so we don't need to clean it up. */ +} + +void FsmAp::findEmbedExpansions( ExpansionList &expansionList, + StateAp *destState, Action *condAction ) +{ + StateCondList destList; + PairIter<TransAp, StateCond> transCond( destState->outList.head, + destState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + switch ( transCond.userState ) { + case RangeInS1: { + if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { + assert( transCond.s1Tel.highKey <= keyOps->maxKey ); + + /* Make a new state cond. */ + StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); + destList.append( newStateCond ); + + /* Create the expansion. */ + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + expansion->toCondSpace = newStateCond->condSpace; + expansion->toValsList.append( 1 ); + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + break; + } + case RangeInS2: { + /* Enhance state cond and find the expansion. */ + StateCond *stateCond = transCond.s2Tel.trans; + stateCond->lowKey = transCond.s2Tel.lowKey; + stateCond->highKey = transCond.s2Tel.highKey; + + CondSet &destCS = stateCond->condSpace->condSet; + long destLen = destCS.length(); + CondSpace *fromCondSpace = stateCond->condSpace; + + CondSet mergedCS = destCS; + mergedCS.insert( condAction ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + stateCond->condSpace = toCondSpace; + destList.append( stateCond ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + long targVals = basicVals; + Action **cim = mergedCS.find( condAction ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + + LongVect expandToVals( targVals ); + findCondExpInTrans( expansionList, destState, + transCond.s2Tel.lowKey, transCond.s2Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + break; + } + + + case RangeOverlap: + case BreakS1: + case BreakS2: + assert( false ); + break; + } + } + + destState->stateCondList.transfer( destList ); +} + +void FsmAp::embedCondition( StateAp *state, Action *condAction ) +{ + MergeData md; + ExpansionList expList; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Worker. */ + embedCondition( md, state, condAction ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction ) +{ + ExpansionList expList; + + findEmbedExpansions( expList, state, condAction ); + doExpand( md, state, expList ); + doRemove( md, state, expList ); + expList.empty(); +} diff --git a/ragel/fsmgraph.h b/ragel/fsmgraph.h new file mode 100644 index 0000000..1a8e80c --- /dev/null +++ b/ragel/fsmgraph.h @@ -0,0 +1,1369 @@ +/* + * Copyright 2001-2004 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include <assert.h> +#include "common.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" + +//#define LOG_CONDS + +/* Flags that control merging. */ +#define SB_GRAPH1 0x01 +#define SB_GRAPH2 0x02 +#define SB_BOTH 0x03 +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 + +struct TransAp; +struct StateAp; +struct FsmAp; +struct Action; +struct LongestMatchPart; + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + StateAp *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +extern KeyOps *keyOps; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, LongestMatchPart*, CmpOrd<int> > +{ + void setAction( int ordering, LongestMatchPart *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + int key; + int priority; +}; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* Transition class that implements actions and priorities. */ +struct TransAp +{ + TransAp() : fromState(0), toState(0) {} + TransAp( const TransAp &other ) : + lowKey(other.lowKey), + highKey(other.highKey), + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable) + { + assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); + } + + Key lowKey, highKey; + StateAp *fromState; + StateAp *toState; + + /* Pointers for outlist. */ + TransAp *prev, *next; + + /* Pointers for in-list. */ + TransAp *ilprev, *ilnext; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct TransInList +{ + TransInList() : head(0) { } + + TransAp *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const TransInList &il ) : ptr(il.head) { } + Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator TransAp*() const { return ptr; } + TransAp &operator *() const { return *ptr; } + TransAp *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + TransAp *ptr; + }; +}; + +typedef DList<TransAp> TransList; + +/* Set of states, list of states. */ +typedef BstSet<StateAp*> StateSet; +typedef DList<StateAp> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + StateAp *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict; + +/* Data needed for a merge operation. */ +struct MergeData +{ + MergeData() + : stfillHead(0), stfillTail(0) { } + + StateDict stateDict; + + StateAp *stfillHead; + StateAp *stfillTail; + + void fillListAppend( StateAp *state ); +}; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, TransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + TransAp *value; +}; + +struct CmpKey +{ + static int compare( const Key key1, const Key key2 ) + { + if ( key1 < key2 ) + return -1; + else if ( key1 > key2 ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +typedef BstSet<Key, CmpKey> KeySet; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( StateAp *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + StateAp *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<LongestMatchPart*> LmItemSet; + +/* Conditions. */ +typedef BstSet< Action*, CmpOrd<Action*> > CondSet; +typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + CondSet condSet; + Key baseKey; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +struct StateCond +{ + StateCond( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey) {} + + Key lowKey; + Key highKey; + CondSpace *condSpace; + + StateCond *prev, *next; +}; + +typedef DList<StateCond> StateCondList; +typedef Vector<long> LongVect; + +struct Expansion +{ + Expansion( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey), + fromTrans(0), fromCondSpace(0), + toCondSpace(0) {} + + ~Expansion() + { + if ( fromTrans != 0 ) + delete fromTrans; + } + + Key lowKey; + Key highKey; + + TransAp *fromTrans; + CondSpace *fromCondSpace; + long fromVals; + + CondSpace *toCondSpace; + LongVect toValsList; + + Expansion *prev, *next; +}; + +typedef DList<Expansion> ExpansionList; + +struct Removal +{ + Key lowKey; + Key highKey; + + Removal *next; +}; + +struct CondData +{ + CondData() : nextCondKey(0) {} + + /* Condition info. */ + Key nextCondKey; + + CondSpaceMap condSpaceMap; +}; + +extern CondData *condData; + +/* State class that implements actions and priorities. */ +struct StateAp +{ + StateAp(); + StateAp(const StateAp &other); + ~StateAp(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inList; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Condition info. */ + StateCondList stateCondList; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + StateAp *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + StateAp *next; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + StateAp *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + StateAp *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this sttate. */ + ActionSet outCondSet; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; +}; + +template <class ListItem> struct NextTrans +{ + Key lowKey, highKey; + ListItem *trans; + ListItem *next; + + void load() { + if ( trans == 0 ) + next = 0; + else { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( ListItem *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + + +/* Encodes the different states that are meaningful to the of the iterator. */ +enum PairIterUserState +{ + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 +}; + +template <class ListItem1, class ListItem2 = ListItem1> struct PairIter +{ + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + PairIter( ListItem1 *list1, ListItem2 *list2 ); + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ListItem1 *list1; + ListItem2 *list2; + IterState itState; + PairIterUserState userState; + + NextTrans<ListItem1> s1Tel; + NextTrans<ListItem2> s2Tel; + Key bottomLow, bottomHigh; + ListItem1 *bottomTrans1; + ListItem2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( + ListItem1 *list1, ListItem2 *list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: backIn = true + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: backIn = true + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() +{ + /* This variable is used in dummy statements that follow the entry + * goto labels. The compiler needs some statement to follow the label. */ + bool backIn; + + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans == 0 ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( s2Tel.trans != 0 ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans == 0 ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( s1Tel.trans != 0 ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.highKey < s2Tel.lowKey ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.lowKey ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( s1Tel.lowKey < s2Tel.lowKey ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + s1Tel.highKey.decrement(); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( s2Tel.lowKey < s1Tel.lowKey ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + s2Tel.highKey.decrement(); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( s1Tel.highKey < s2Tel.highKey ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + bottomLow.increment(); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.highKey ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + bottomLow.increment(); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare() { } + bool shouldMark( MarkIndex &markIndex, const StateAp *pState1, + const StateAp *pState2 ); +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, StateAp* > EntryMapEl; +typedef BstMap< int, StateAp* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +/* Graph class that implements actions and priorities. */ +struct FsmAp +{ + /* Constructors/Destructors. */ + FsmAp( ); + FsmAp( const FsmAp &graph ); + ~FsmAp(); + + /* The list of states. */ + StateList stateList; + StateList misfitList; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + StateAp *startState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferErrorActions( StateAp *state, int transferPoint ); + void setErrorAction( StateAp *state, int ordering, Action *action ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( StateAp *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, LongestMatchPart *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void findEmbedExpansions( ExpansionList &expansionList, + StateAp *destState, Action *condAction ); + void embedCondition( MergeData &md, StateAp *state, Action *condAction ); + void embedCondition( StateAp *state, Action *condAction ); + + void startFsmCondition( Action *condAction ); + void allTransCondition( Action *condAction ); + void leaveFsmCondition( Action *condAction ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( StateAp *state ); + void unsetFinState( StateAp *state ); + + void setStartState( StateAp *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, StateAp *state ); + void changeEntry( int id, StateAp *to, StateAp *from ); + void unsetEntry( int id, StateAp *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + void shadowReadWriteStates( MergeData &md ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans ); + void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans ); + + /* Attach with a new transition. */ + TransAp *attachNewTrans( StateAp *from, StateAp *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Detach a state from the graph. */ + void detachState( StateAp *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + TransAp *dupTrans( StateAp *from, TransAp *srcTrans ); + + /* In crossing, two transitions both go to real states. */ + TransAp *fsmAttachStates( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. */ + TransAp *mergeTrans( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + /* Cross a src transition with one that is already occupying a spot. */ + TransAp *crossTransitions( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList ); + + void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 ); + void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 ); + void findCondExpInTrans( ExpansionList &expansionList, StateAp *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long destVals, LongVect &toValsList ); + void findTransExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ); + void findCondExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ); + void mergeStateConds( StateAp *destState, StateAp *srcState ); + + /* Merge a set of states into newState. */ + void mergeStates( MergeData &md, StateAp *destState, + StateAp **srcStates, int numSrc ); + void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState ); + void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void fillInStates( MergeData &md ); + + /* + * Transition Comparison. + */ + + /* Compare transition data. Either of the pointers may be null. */ + static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static inline bool shouldMarkPtr( MarkIndex &markIndex, + TransAp *trans1, TransAp *trans2 ); + + /* + * Callbacks. + */ + + /* Compare priority and function table of transitions. */ + static int compareTransData( TransAp *trans1, TransAp *trans2 ); + + /* Add in the properties of srcTrans into this. */ + void addInTrans( TransAp *destTrans, TransAp *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const StateAp *state1, const StateAp *state2 ); + + /* Out transition data. */ + void clearOutData( StateAp *state ); + bool hasOutData( StateAp *state ); + void transferOutData( StateAp *destState, StateAp *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + StateAp *addState(); + + /* + * Building basic machines + */ + + void concatFsm( Key c ); + void concatFsm( Key *str, int len ); + void concatFsmCI( Key *str, int len ); + void orFsm( Key *set, int len ); + void rangeFsm( Key low, Key high ); + void rangeStarFsm( Key low, Key high ); + void emptyFsm( ); + void lambdaFsm( ); + + /* + * Fsm operators. + */ + + void starOp( ); + void repeatOp( int times ); + void optionalRepeatOp( int times ); + void concatOp( FsmAp *other ); + void unionOp( FsmAp *other ); + void intersectOp( FsmAp *other ); + void subtractOp( FsmAp *other ); + void epsilonOp(); + void joinOp( int startId, int finalId, FsmAp **others, int numOthers ); + void globOp( FsmAp **others, int numOthers ); + void deterministicEntry(); + + /* + * Operator workers + */ + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * identity of the fsm. */ + void isolateStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, StateAp *targ ); + void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ); + void resolveEpsilonTrans( MergeData &md ); + + /* Workers for concatenation and union. */ + void doConcat( FsmAp *other, StateSet *fromStates, bool optional ); + void doOr( FsmAp *other ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmAp *other ); + + /* Set State numbers starting at 0. */ + void setStateNumbers(); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( StateAp *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( StateAp *state ); + void markReachableFromHereStopFinal( StateAp *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( StateAp *state ); + bool anyErrorRange( StateAp *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void inTransMove(StateAp *dest, StateAp *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates(StateAp *dest, StateAp *src); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); +}; + + +#endif /* _FSMGRAPH_H */ diff --git a/ragel/fsmmin.cpp b/ragel/fsmmin.cpp new file mode 100644 index 0000000..c57de6f --- /dev/null +++ b/ragel/fsmmin.cpp @@ -0,0 +1,732 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSort<StateAp*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + StateAp *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmAp::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSort<StateAp*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSort<StateAp*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + StateAp *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { + MinPartition *fromPart = trans->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmAp::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSort<StateAp*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmAp::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + StateAp *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +bool FsmAp::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + StateAp *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} + + +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmAp::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers(); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} + +bool FsmAp::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSort<StateAp*, ApproxCompare> mergeSort; + ApproxCompare approxCompare; + + /* Fill up an array of pointers to the states. */ + StateAp **statePtrs = new StateAp*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + StateAp **pLast = statePtrs; + StateAp **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} + +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmAp::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void FsmAp::removeUnreachableStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state ) { + StateAp *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +bool FsmAp::outListCovers( StateAp *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( keyOps->minKey < trans->lowKey ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + lowKey.decrement(); + if ( trans->prev->highKey < lowKey ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( trans->highKey < keyOps->maxKey ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmAp::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + StateAp **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= SB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state != 0 ) { + StateAp *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmAp::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + inTransMove( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + StateAp *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmAp::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + StateAp *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + StateAp *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + + +/* Merge neighboring transitions go to the same state and have the same + * transitions data. */ +void FsmAp::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + nextLow.decrement(); + if ( trans->highKey == nextLow && trans->toState == next->toState && + CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) + { + trans->highKey = next->highKey; + st->outList.detach( next ); + detachTrans( next->fromState, next->toState, next ); + delete next; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} diff --git a/ragel/fsmstate.cpp b/ragel/fsmstate.cpp new file mode 100644 index 0000000..4322c10 --- /dev/null +++ b/ragel/fsmstate.cpp @@ -0,0 +1,463 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +StateAp::StateAp() +: + /* No out or in transitions. */ + outList(), + inList(), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* Conditions. */ + stateCondList(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSet(), + errActionTable(), + eofActionTable() +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmAp copy constructor. */ +StateAp::StateAp(const StateAp &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inList(), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* Copy in the elements of the conditions. */ + stateCondList( other.stateCondList ), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSet(other.outCondSet), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransAp *newTrans = new TransAp(*trans); + newTrans->toState = trans->toState; + outList.append( newTrans ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +StateAp::~StateAp() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Compare two states using pointers to the states. With the approximate + * compare the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const StateAp *state1 , const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} + +/* Compare class for the sort that does the intial partition of compaction. */ +int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the condition pairs. */ + PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head ); + for ( ; !condPair.end(); condPair++ ) { + switch ( condPair.userState ) { + case RangeInS1: + return 1; + case RangeInS2: + return -1; + + case RangeOverlap: { + CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace; + CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace; + if ( condSpace1 < condSpace2 ) + return -1; + else if ( condSpace1 > condSpace2 ) + return 1; + break; + } + case BreakS1: + case BreakS2: + break; + } + } + + /* Use a pair iterator to test the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::compareDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::compareDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::compareDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::comparePartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::comparePartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::comparePartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1, + const StateAp *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangeInS2: + if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangeOverlap: + if ( FsmAp::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return false; +} + +/* + * Transition Comparison. + */ + +/* Compare target partitions. Either pointer may be null. */ +int FsmAp::comparePartPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmAp::compareDataPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + else if ( trans1->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} + + +bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1, + TransAp *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( trans1->toState->alg.stateNum, + trans2->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} + + diff --git a/ragel/main.cpp b/ragel/main.cpp new file mode 100644 index 0000000..41d6e6a --- /dev/null +++ b/ragel/main.cpp @@ -0,0 +1,339 @@ +/* + * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <unistd.h> +#include <sstream> + +/* Parsing. */ +#include "ragel.h" + +/* Parameters and output. */ +#include "pcheck.h" +#include "vector.h" +#include "version.h" + +#include "common.cpp" + +using std::istream; +using std::ostream; +using std::ifstream; +using std::ofstream; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +char *outputFileName = 0; + +/* Controls minimization. */ +MinimizeLevel minimizeLevel = MinimizePartition2; +MinimizeOpt minimizeOpt = MinimizeMostOps; + +/* Graphviz dot file generation. */ +char *machineSpec = 0, *machineName = 0; +bool machineSpecFound = false; + +bool printStatistics = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: ragel [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +" -s Print some statistics on stderr\n" +"fsm minimization:\n" +" -n Do not perform minimization\n" +" -m Minimize at the end of the compilation\n" +" -l Minimize after most operations (default)\n" +" -e Minimize after every operation\n" +"machine selection:\n" +" -S <spec> FSM specification to output for -V\n" +" -M <machine> Machine definition/instantiation to output for -V\n" +"host language:\n" +" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n" +" -D The host language is D\n" +" -J The host language is Java\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2001-2006 by Adrian Thurston" << endl; +} + +/* Global parse data pointer. */ +//extern InputData *id; + +/* Total error count. */ +int gblErrorCount = 0; + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + /* Keep the error count. */ +// if ( id != 0 && id->pd != 0 ) +// id->pd->errorCount += 1; + gblErrorCount += 1; + + cerr << PROGNAME ": "; + return cerr; +} + +/* Print the opening to a warning, then return the error ostream. */ +ostream &warning( ) +{ +// cerr << id->fileName << ": warning: "; + return cerr; +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &warning( const InputLoc &loc ) +{ +// cerr << id->fileName << ":" << loc.line << ":" << +// loc.col << ": warning: "; + return cerr; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, char **argv) +{ + ParamCheck pc("o:nmleabjkS:M:CDJvHh?-:s", argc, argv); + char *inputFileName = 0; + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + /* Minimization, mostly hidden options. */ + case 'n': + minimizeOpt = MinimizeNone; + break; + case 'm': + minimizeOpt = MinimizeEnd; + break; + case 'l': + minimizeOpt = MinimizeMostOps; + break; + case 'e': + minimizeOpt = MinimizeEveryOp; + break; + case 'a': + minimizeLevel = MinimizeApprox; + break; + case 'b': + minimizeLevel = MinimizeStable; + break; + case 'j': + minimizeLevel = MinimizePartition1; + break; + case 'k': + minimizeLevel = MinimizePartition2; + break; + + /* Machine spec. */ + case 'S': + if ( *pc.parameterArg == 0 ) + error() << "please specify an argument to -S" << endl; + else if ( machineSpec != 0 ) + error() << "more than one -S argument was given" << endl; + else { + /* Ok, remember the path to the machine to generate. */ + machineSpec = pc.parameterArg; + } + break; + + /* Machine path. */ + case 'M': + if ( *pc.parameterArg == 0 ) + error() << "please specify an argument to -M" << endl; + else if ( machineName != 0 ) + error() << "more than one -M argument was given" << endl; + else { + /* Ok, remember the machine name to generate. */ + machineName = pc.parameterArg; + } + break; + + /* Host language types. */ + case 'C': + hostLangType = CCode; + hostLang = &hostLangC; + break; + case 'D': + hostLangType = DCode; + hostLang = &hostLangD; + break; + case 'J': + hostLangType = JavaCode; + hostLang = &hostLangJava; + break; + + /* Version and help. */ + case 'v': + version(); + exit(0); + case 'H': case 'h': case '?': + usage(); + exit(0); + case 's': + printStatistics = true; + break; + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + } + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Open the input file for reading. */ + if ( inputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endl; + } + else { + inputFileName = "<stdin>"; + inStream = &cin; + } + + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + std::ostringstream outputBuffer; + outStream = &outputBuffer; + + if ( machineSpec == 0 && machineName == 0 ) + *outStream << "<host line=\"1\" col=\"1\">"; + + scan( inputFileName, *inStream ); + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + /* Now send EOF to all parsers. */ + terminateAllParsers(); + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + if ( machineSpec == 0 && machineName == 0 ) + *outStream << "</host>\n"; + + checkMachines(); + + if ( gblErrorCount > 0 ) + return 1; + + ostream *outputFile = 0; + if ( outputFileName != 0 ) + outputFile = new ofstream( outputFileName ); + else + outputFile = &cout; + + /* Write the machines, then the surrounding code. */ + writeMachines( *outputFile, outputBuffer.str(), inputFileName ); + + if ( outputFileName != 0 ) + delete outputFile; + + return 0; +} diff --git a/ragel/parsedata.cpp b/ragel/parsedata.cpp new file mode 100644 index 0000000..3209e28 --- /dev/null +++ b/ragel/parsedata.cpp @@ -0,0 +1,1432 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> + +#include "ragel.h" +#include "rlparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "xmlcodegen.h" + +using namespace std; + +char machineMain[] = "main"; + +void Token::set( char *str, int len ) +{ + length = len; + data = new char[len+1]; + memcpy( data, str, len ); + data[len] = 0; +} + +void Token::append( const Token &other ) +{ + int newLength = length + other.length; + char *newString = new char[newLength+1]; + memcpy( newString, data, length ); + memcpy( newString + length, other.data, other.length ); + newString[newLength] = 0; + data = newString; + length = newLength; +} + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmAp *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) { + /* First clean up the graph. FsmAp operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + + switch ( minimizeLevel ) { + case MinimizeApprox: + fsm->minimizeApproximate(); + break; + case MinimizePartition1: + fsm->minimizePartition1(); + break; + case MinimizePartition2: + fsm->minimizePartition2(); + break; + case MinimizeStable: + fsm->minimizeStable(); + break; + } + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmAp *fsm ) +{ + int numTrans = 0; + StateAp *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) + ul |= (0xffffffff >> (size*8 ) ) << (size*8); + + return Key( (long)ul ); +} + +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = keyOps->alphType->minVal; + long long maxVal = keyOps->alphType->maxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( errno == ERANGE && ll < 0 || ll < minVal) { + error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( errno == ERANGE && ll > 0 || ll > maxVal ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + if ( keyOps->alphType->isSigned ) + return Key( (long)ll ); + else + return Key( (unsigned long)ll ); +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +FsmAp *dotFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmAp *dotStarFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) +{ + /* FsmAp created to return. */ + FsmAp *retFsm = 0; + bool isSigned = keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) { + retFsm = new FsmAp(); + retFsm->rangeFsm( -128, 127 ); + } + else { + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 255 ); + } + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmAp *upper = new FsmAp(), *lower = new FsmAp(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmAp *digit = new FsmAp(), *lower = new FsmAp(); + FsmAp *upper = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *highChar = new FsmAp(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmAp *range1 = new FsmAp(); + FsmAp *range2 = new FsmAp(); + FsmAp *range3 = new FsmAp(); + FsmAp *range4 = new FsmAp(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *space = new FsmAp(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmAp *digit = new FsmAp(); + FsmAp *upper = new FsmAp(); + FsmAp *lower = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmAp(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +/* + * ParseData + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +ParseData::ParseData( char *fileName, char *sectionName, + const InputLoc §ionLoc ) +: + sectionGraph(0), + generatingSectionSubset(false), + nextPriorKey(0), + /* 0 is reserved for global error actions. */ + nextLocalErrKey(1), + nextNameId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + lowerNum(0), + upperNum(0), + fileName(fileName), + sectionName(sectionName), + sectionLoc(sectionLoc), + errorCount(0), + curActionOrd(0), + curPriorOrd(0), + rootName(0), + nextEpsilonResolvedLink(0), + nextLongestMatchId(1), + lmRequiresErrorState(false) +{ + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + initGraphDict(); +} + +/* Clean up the data collected during a parse. */ +ParseData::~ParseData() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); +} + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( data != 0 ) + curNameInst->children.insertMulti( data, newNameInst ); + return newNameInst; +} + +void ParseData::initNameWalk() +{ + curNameInst = rootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void ParseData::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void ParseData::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void ParseData::unsetObsoleteEntries( FsmAp *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + } + } +} + +NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *low, *high; + if ( from->children.findMulti( data, low, high ) ) { + /* Record all instances of the name. */ + for ( ; low <= high; low++ ) + result.insert( low->value ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef.length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); + + /* Write the name and cleanup. */ + out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void errorStateLabels( const NameSet &resolved ) +{ + MergeSort<NameInst*, CmpNameInstLoc> mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + error((*res)->loc) << " -> " << **res << endl; +} + + +NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ) +{ + NameInst *nameInst = 0; + + /* Do the local search if the name is not strictly a root level name + * search. */ + if ( nameRef[0] != 0 ) { + /* If the action is referenced, resolve all of them. */ + if ( action != 0 && action->actionRefs.length() > 0 ) { + /* Look for the name in all referencing scopes. */ + NameSet resolved; + for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ ) + resolveFrom( resolved, *actRef, nameRef, 0 ); + + if ( resolved.length() > 0 ) { + /* Take the first one. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + } + + /* If not found in the local scope, look in global. */ + if ( nameInst == 0 ) { + NameSet resolved; + int fromPos = nameRef[0] != 0 ? 0 : 1; + resolveFrom( resolved, rootName, nameRef, fromPos ); + + if ( resolved.length() > 0 ) { + /* Take the first. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + if ( nameInst == 0 ) { + /* If not found then complain. */ + error(loc) << "could not resolve state reference " << nameRef << endl; + } + return nameInst; +} + +void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Entry: case InlineItem::Goto: + case InlineItem::Call: case InlineItem::Next: { + /* Resolve, pass action for local search. */ + NameInst *target = resolveStateRef( *item->nameRef, item->loc, action ); + + /* Check if the target goes into a longest match. */ + NameInst *search = target->parent; + while ( search != 0 ) { + if ( search->isLongestMatch ) { + error(item->loc) << "cannot enter inside a longest " + "match construction as an entry point" << endl; + break; + } + search = search->parent; + } + + /* Note the reference in the name. This will cause the entry + * point to survive to the end of the graph generating walk. */ + if ( target != 0 ) + target->numRefs += 1; + item->nameTarg = target; + break; + } + default: + break; + } + + /* Some of the item types may have children. */ + if ( item->children != 0 ) + resolveNameRefs( item->children, action ); + } +} + +/* Resolve references to labels in actions. */ +void ParseData::resolveActionNameRefs() +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only care about the actions that are referenced. */ + if ( act->actionRefs.length() > 0 ) + resolveNameRefs( act->inlineList, act ); + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void ParseData::fillNameIndex( NameInst *from ) +{ + /* Fill the value for from in the name index. */ + nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( *name ); +} + +void ParseData::makeRootName() +{ + /* Create the root name. */ + rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); +} + +/* Build the name tree and supporting data structures. */ +void ParseData::makeNameTree( GraphDictEl *dictEl ) +{ + /* Set up curNameInst for the walk. */ + curNameInst = rootName; + curNameChild = 0; + + if ( dictEl != 0 ) { + /* A start location has been specified. */ + dictEl->value->makeNameTree( dictEl->loc, this ); + } + else { + /* First make the name tree. */ + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + } + + /* The number of nodes in the tree can now be given by nextNameId */ + nameIndex = new NameInst*[nextNameId]; + memset( nameIndex, 0, sizeof(NameInst*)*nextNameId ); + fillNameIndex( rootName ); +} + +void ParseData::createBuiltin( char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + JoinOrLm *joinOrLm = new JoinOrLm( join ); + VarDef *varDef = new VarDef( name, joinOrLm ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + graphDict.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void ParseData::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1, char *s2 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 != 0 && + strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void ParseData::initKeyOps( ) +{ + /* Signedness and bounds. */ + HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } + + thisCondData.nextCondKey = thisKeyOps.maxKey; + thisCondData.nextCondKey.increment(); +} + +void ParseData::printNameInst( NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + cerr << " "; + cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( *name, level+1 ); +} + +/* Remove duplicates of unique actions from an action table. */ +void ParseData::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void ParseData::removeActionDups( FsmAp *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *ParseData::newAction( char *name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + + Action *action = new Action( loc, name, inlineList ); + action->actionRefs.append( rootName ); + actionList.append( action ); + return action; +} + +void ParseData::initLongestMatchData() +{ + if ( lmList.length() > 0 ) { + /* The initTokStart action resets the token start. */ + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) ); + initTokStart = newAction( "initts", il1 ); + initTokStart->isLmAction = true; + + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "tokstart", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "tokend", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initTokStartOrd = curActionOrd++; + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +/* After building the graph, do some extra processing to ensure the runtime + * data of the longest mactch operators is consistent. */ +void ParseData::setLongestMatchData( FsmAp *graph ) +{ + if ( lmList.length() > 0 ) { + /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) + * init the tokstart. */ + for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = en->value->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + + /* Find the set of states that are the target of transitions with + * actions that have calls. These states will be targeted by fret + * statements. */ + StateSet states; + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) { + if ( ati->value->anyCall && trans->toState != 0 ) + states.insert( trans->toState ); + } + } + } + + + /* Init tokstart upon entering the above collected states. */ + for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = (*ps)->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + } +} + +/* Make the graph from a graph dict node. Does minimization and state sorting. */ +FsmAp *ParseData::makeInstance( GraphDictEl *gdNode ) +{ + /* Build the graph from a walk of the parse tree. */ + FsmAp *graph = gdNode->value->walk( this ); + + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( minimizeLevel ) { + case MinimizeApprox: + graph->minimizeApproximate(); + break; + case MinimizeStable: + graph->minimizeStable(); + break; + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + return graph; +} + +void ParseData::printNameTree() +{ + /* Print the name instance map. */ + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( *name, 0 ); + + cerr << "name index:" << endl; + /* Show that the name index is correct. */ + for ( int ni = 0; ni < nextNameId; ni++ ) { + cerr << ni << ": "; + char *name = nameIndex[ni]->name; + cerr << ( name != 0 ? name : "<ANON>" ) << endl; + } +} + +FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode ) +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( gdNode ); + + /* Resove name references from gdNode. */ + initNameWalk(); + gdNode->value->resolveNameRefs( this ); + + /* Do not resolve action references. Since we are not building the entire + * graph there's a good chance that many name references will fail. This + * is okay since generating part of the graph is usually only done when + * inspecting the compiled machine. */ + + /* Flag this case so that the XML code generator is aware that we haven't + * looked up name references in actions. It can then avoid segfaulting. */ + generatingSectionSubset = true; + + /* Just building the specified graph. */ + initNameWalk(); + FsmAp *mainGraph = makeInstance( gdNode ); + + return mainGraph; +} + +FsmAp *ParseData::makeAll() +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( 0 ); + + /* Resove name references in the tree. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + glel->value->resolveNameRefs( this ); + + /* Resolve action code name references. */ + resolveActionNameRefs(); + + FsmAp *mainGraph = 0; + FsmAp **graphs = new FsmAp*[instanceList.length()]; + int numOthers = 0; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + if ( strcmp( glel->key, machineMain ) == 0 ) { + /* Main graph is always instantiated. */ + mainGraph = makeInstance( glel ); + } + else { + /* Check to see if the instance is ever referenced. */ + NameInst *nameInst = nextNameScope(); + if ( nameInst->anyRefsRec() ) + graphs[numOthers++] = makeInstance( glel ); + else { + /* Need to walk over the name tree item. */ + NameFrame nameFrame = enterNameScope( true, 1 ); + popNameScope( nameFrame ); + } + } + } + + if ( numOthers > 0 ) { + /* Add all the other graphs into main. */ + mainGraph->globOp( graphs, numOthers ); + } + + delete[] graphs; + return mainGraph; +} + +void ParseData::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + LongestMatch *lm = item->longestMatch; + for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + LongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void ParseData::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + case InlineItem::PChar: + error(item->loc) << "pointer to current element does not exist in " + "EOF action code" << endl; + break; + case InlineItem::Char: + error(item->loc) << "current element does not exist in " + "EOF action code" << endl; + break; + case InlineItem::Hold: + error(item->loc) << "changing the current element not possible in " + "EOF action code" << endl; + break; + case InlineItem::Exec: + error(item->loc) << "changing the current element not possible in " + "EOF action code" << endl; + break; + case InlineItem::Goto: case InlineItem::Call: + case InlineItem::Next: case InlineItem::GotoExpr: + case InlineItem::CallExpr: case InlineItem::NextExpr: + case InlineItem::Ret: + error(item->loc) << "changing the current state not possible in " + "EOF action code" << endl; + break; + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void ParseData::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + error(action->loc) << "within a scanner, fcall is permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + + +void ParseData::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) + (*sci)->numCondRefs += 1; + } + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +void ParseData::prepareMachineGen( GraphDictEl *graphDictEl ) +{ + beginProcessing(); + initKeyOps(); + makeRootName(); + initLongestMatchData(); + + /* Make the graph, do minimization. */ + if ( graphDictEl == 0 ) + sectionGraph = makeAll(); + else + sectionGraph = makeSpecific( graphDictEl ); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return; + + analyzeGraph( sectionGraph ); + + /* Depends on the graph analysis. */ + setLongestMatchData( sectionGraph ); +} + +void ParseData::generateXML( ostream &out ) +{ + beginProcessing(); + + /* Make the generator. */ + XMLCodeGen codeGen( sectionName, this, sectionGraph, out ); + + /* Write out with it. */ + codeGen.writeXML(); + + if ( printStatistics ) { + cerr << "fsm name : " << sectionName << endl; + cerr << "num states: " << sectionGraph->stateList.length() << endl; + cerr << endl; + } +} + +/* Send eof to all parsers. */ +void terminateAllParsers( ) +{ + /* FIXME: a proper token is needed here. Suppose we should use the + * location of EOF in the last file that the parser was referenced in. */ + InputLoc loc; + loc.fileName = "<EOF>"; + loc.line = 0; + loc.col = 0; + for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ ) + pdel->value->token( loc, _eof, 0, 0 ); +} + +void checkMachines( ) +{ + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *pd = parser->value->pd; + if ( pd->instanceList.length() > 0 ) { + /* There must be a main graph defined. */ + /* No machine name. Need to have a main. Make sure it was given. */ + GraphDictEl *mainEl = pd->graphDict.find( machineMain ); + if ( mainEl == 0 ) { + error(pd->sectionLoc) << "main graph not defined in \"" << + pd->sectionName << "\"" << endl; + } + } + } +} + +void writeLanguage( std::ostream &out ) +{ + out << " lang=\""; + switch ( hostLangType ) { + case CCode: out << "C"; break; + case DCode: out << "D"; break; + case JavaCode: out << "Java"; break; + } + out << "\""; + +} + +void writeMachines( std::ostream &out, std::string hostData, char *inputFileName ) +{ + if ( machineSpec == 0 && machineName == 0 ) { + /* No machine spec or machine name given. Generate everything. */ + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *pd = parser->value->pd; + if ( pd->instanceList.length() > 0 ) + pd->prepareMachineGen( 0 ); + } + + if ( gblErrorCount == 0 ) { + out << "<ragel filename=\"" << inputFileName << "\""; + writeLanguage( out ); + out << ">\n"; + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *pd = parser->value->pd; + if ( pd->instanceList.length() > 0 ) + pd->generateXML( out ); + } + out << hostData; + out << "</ragel>\n"; + } + } + else if ( parserDict.length() > 0 ) { + /* There is either a machine spec or machine name given. */ + ParseData *parseData = 0; + GraphDictEl *graphDictEl = 0; + + /* Traverse the sections, break out when we find a section/machine + * that matches the one specified. */ + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *checkPd = parser->value->pd; + if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) { + GraphDictEl *checkGdEl = 0; + if ( machineName == 0 || (checkGdEl = + checkPd->graphDict.find( machineName )) != 0 ) + { + /* Have a machine spec and/or machine name that matches + * the -M/-S options. */ + parseData = checkPd; + graphDictEl = checkGdEl; + break; + } + } + } + + if ( parseData == 0 ) + error() << "could not locate machine specified with -S and/or -M" << endl; + else { + /* Section/Machine to emit was found. Prepare and emit it. */ + parseData->prepareMachineGen( graphDictEl ); + if ( gblErrorCount == 0 ) { + out << "<ragel filename=\"" << inputFileName << "\""; + writeLanguage( out ); + out << ">\n"; + parseData->generateXML( out ); + out << hostData; + out << "</ragel>\n"; + } + } + } +} diff --git a/ragel/parsedata.h b/ragel/parsedata.h new file mode 100644 index 0000000..a856257 --- /dev/null +++ b/ragel/parsedata.h @@ -0,0 +1,463 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include <iostream> +#include <limits.h> +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "common.h" +#include "parsetree.h" + +/* Forwards. */ +using std::ostream; + +/* Nodes in the tree that use this action. */ +typedef Vector<NameInst*> ActionRefs; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + Action( const InputLoc &loc, char *name, InlineList *inlineList ) + : + loc(loc), + name(name), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false) + { + } + + /* Key for action dictionary. */ + char *getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + char *name; + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name != 0 ) + out << name; + else + out << loc.line << ":" << loc.col; + } + + /* Places in the input text that reference the action. */ + ActionRefs actionRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + bool anyCall; + + bool isLmAction; +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; +typedef AvlTree<Action, char *, CmpStr> ActionDict; + +/* Structure for reverse action mapping. */ +struct RevActionMapEl +{ + char *name; + InputLoc location; +}; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct LongestMatch; +typedef DList<LongestMatch> LmList; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl( char *k ) + : key(k), value(0), isInstance(false) { } + GraphDictEl( char *k, VarDef *value ) + : key(k), value(value), isInstance(false) { } + + const char *getKey() { return key; } + + char *key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict; +typedef DList<GraphDictEl> GraphList; + +/* Priority name dictionary. */ +typedef AvlMapEl<char*, int> PriorDictEl; +typedef AvlMap<char*, int, CmpStr> PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl<char*, int> LocalErrDictEl; +typedef AvlMap<char*, int, CmpStr> LocalErrDict; + +/* Tree of instantiated names. */ +typedef BstMapEl<char*, NameInst*> NameMapEl; +typedef BstMap<char*, NameInst*, CmpStr> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, char *name, int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + char *name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +/* Class to collect information about the machine during the + * parse of input. */ +struct ParseData +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + ParseData( char *fileName, char *sectionName, const InputLoc §ionLoc ); + ~ParseData(); + + /* + * Setting up the graph dict. + */ + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel ); + void makeRootName(); + void makeNameTree( GraphDictEl *gdNode ); + void fillNameIndex( NameInst *from ); + void printNameTree(); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmAp *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ); + NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ); + void resolveNameRefs( InlineList *inlineList, Action *action ); + void resolveActionNameRefs(); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( char *s1, char *s2 ); + bool setAlphType( char *s1 ); + + /* Unique actions. */ + void removeDups( ActionTable &actionTable ); + void removeActionDups( FsmAp *graph ); + + /* Dumping the name instantiation tree. */ + void printNameInst( NameInst *nameInst, int level ); + + /* Make the graph from a graph dict node. Does minimization. */ + FsmAp *makeInstance( GraphDictEl *gdNode ); + FsmAp *makeSpecific( GraphDictEl *gdNode ); + FsmAp *makeAll(); + + /* Checking the contents of actions. */ + void checkAction( Action *action ); + void checkInlineList( Action *act, InlineList *inlineList ); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmAp *graph ); + + void prepareMachineGen( GraphDictEl *graphDictEl ); + void generateXML( ostream &out ); + FsmAp *sectionGraph; + bool generatingSectionSubset; + + void initKeyOps(); + + /* + * Data collected during the parse. + */ + + /* Dictionary of graphs. Both instances and non-instances go here. */ + GraphDict graphDict; + + /* The list of instances. */ + GraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + /* The id of the next priority name and label. */ + int nextPriorKey, nextLocalErrKey, nextNameId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *userAlphType; + bool alphTypeSet; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + /* The name of the file the fsm is from, and the spec name. */ + char *fileName; + char *sectionName; + InputLoc sectionLoc; + + /* Number of errors encountered parsing the fsm spec. */ + int errorCount; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + /* Root of the name tree. */ + NameInst *rootName; + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void setLongestMatchData( FsmAp *graph ); + void initNameWalk(); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + /* Make name ids to name inst pointers. */ + NameInst **nameIndex; + + /* Counter for assigning ids to longest match items. */ + int nextLongestMatchId; + bool lmRequiresErrorState; + + /* List of all longest match parse tree items. */ + LmList lmList; + + Action *newAction( char *name, InlineList *inlineList ); + + Action *initTokStart; + int initTokStartOrd; + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + void beginProcessing() + { + ::condData = &thisCondData; + ::keyOps = &thisKeyOps; + } + + CondData thisCondData; + KeyOps thisKeyOps; +}; + +void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true ); +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyChar( char c, ParseData *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ); +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); +FsmAp *dotFsm( ParseData *pd ); +FsmAp *dotStarFsm( ParseData *pd ); + +void errorStateLabels( const NameSet &locations ); + +/* Data used by the parser specific to the current file. Supports the include + * system, since a new parser is executed for each included file. */ +struct InputData +{ + InputData( char *fileName, char *includeSpec, char *includeTo ) : + pd(0), sectionName(0), defaultParseData(0), + first_line(1), first_column(1), + last_line(1), last_column(0), + fileName(fileName), includeSpec(includeSpec), + includeTo(includeTo), active(true) + {} + + /* For collecting a name references. */ + NameRef nameRef; + NameRefList nameRefList; + + /* The parse data. For each fsm spec, the parser collects things that it parses + * in data structures in here. */ + ParseData *pd; + + char *sectionName; + ParseData *defaultParseData; + + int first_line; + int first_column; + int last_line; + int last_column; + + char *fileName; + + /* If this is an included file, this contains the specification to search + * for. IncludeTo will contain the spec name that does the includng. */ + char *includeSpec; + char *includeTo; + + bool active; + InputLoc sectionLoc; +}; + +struct Parser; + +typedef AvlMap<char*, Parser *, CmpStr> ParserDict; +typedef AvlMapEl<char*, Parser *> ParserDictEl; + +extern ParserDict parserDict; + + +#endif /* _PARSEDATA_H */ diff --git a/ragel/parsetree.cpp b/ragel/parsetree.cpp new file mode 100644 index 0000000..11c58fa --- /dev/null +++ b/ragel/parsetree.cpp @@ -0,0 +1,2111 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + +/* Parsing. */ +#include "ragel.h" +#include "rlparse.h" +#include "parsetree.h" + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain a + * literal string with \0 */ +void Token::prepareLitString( Token &result, bool &caseInsensitive ) +{ + result.data = new char[this->length+1]; + caseInsensitive = false; + + char *src = this->data + 1; + char *end = this->data + this->length - 1; + + while ( *end != '\'' && *end != '\"' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + error( this->loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + + char *dest = result.data; + int len = 0; + while ( src != end ) { + if ( *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[len++] = '\0'; break; + case 'a': dest[len++] = '\a'; break; + case 'b': dest[len++] = '\b'; break; + case 't': dest[len++] = '\t'; break; + case 'n': dest[len++] = '\n'; break; + case 'v': dest[len++] = '\v'; break; + case 'f': dest[len++] = '\f'; break; + case 'r': dest[len++] = '\r'; break; + case '\n': break; + default: dest[len++] = src[1]; break; + } + src += 2; + } + else { + dest[len++] = *src++; + } + } + result.length = len; + result.data[result.length] = 0; +} + + +FsmAp *VarDef::walk( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmAp *rtnVal = joinOrLm->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 ) + rtnVal->epsilonOp(); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + if ( joinOrLm->type == JoinOrLm::LongestMatchType ) + pd->curNameInst->isLongestMatch = true; + + /* Recurse. */ + joinOrLm->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void VarDef::resolveNameRefs( ParseData *pd ) +{ + /* Entering into a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse. */ + joinOrLm->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +InputLoc LongestMatchPart::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc, + char *name, InlineList *inlineList ) +{ + Action *action = new Action( loc, name, inlineList ); + action->actionRefs.append( pd->curNameInst ); + pd->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void LongestMatch::makeActions( ParseData *pd ) +{ + /* Make actions that set the action id. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last character. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "imm%i", lmi->longestMatchId ); + lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "lagh%i", lmi->longestMatchId ); + lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + + /* Create the error action. */ + InlineList *il6 = new InlineList; + il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newAction( pd, loc, "lagsel", il6 ); +} + +void LongestMatch::findName( ParseData *pd ) +{ + NameInst *nameInst = pd->curNameInst; + while ( nameInst->name == 0 ) { + nameInst = nameInst->parent; + /* Since every machine must must have a name, we should always find a + * name for the longest match. */ + assert( nameInst != 0 ); + } + name = nameInst->name; +} + +void LongestMatch::makeNameTree( ParseData *pd ) +{ + /* Create an anonymous scope for the longest match. Will be used for + * restarting machine after matching a token. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Recurse into all parts of the longest match operator. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) + lmi->join->makeNameTree( pd ); + + /* Traverse the name tree upwards to find a name for this lm. */ + findName( pd ); + + /* Also make the longest match's actions at this point. */ + makeActions( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void LongestMatch::resolveNameRefs( ParseData *pd ) +{ + /* The longest match gets its own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Take an action reference for each longest match item and recurse. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* Record the reference if the item has an action. */ + if ( lmi->action != 0 ) + lmi->action->actionRefs.append( pd->localNameScope ); + + /* Recurse down the join. */ + lmi->join->resolveNameRefs( pd ); + } + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +void LongestMatch::restart( FsmAp *graph, TransAp *trans ) +{ + StateAp *fromState = trans->fromState; + graph->detachTrans( fromState, trans->toState, trans ); + graph->attachTrans( fromState, graph->startState, trans ); +} + +void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + StateAp *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. We need to do this if there are any states + * with lmItemSet.length() > 1 and NULL is included. That is, that the + * switch may get called when in fact nothing has been matched. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + pd->lmRequiresErrorState = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector<TransAp*> restartTrans; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + StateAp *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartTrans.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ ) + restart( graph, *pt ); + + int lmErrActionOrd = pd->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of the which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +FsmAp *LongestMatch::walk( ParseData *pd ) +{ + /* The longest match has it's own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Make each part of the longest match. */ + FsmAp **parts = new FsmAp*[longestMatchList->length()]; + LmPartList::Iter lmi = *longestMatchList; + for ( int i = 0; lmi.lte(); lmi++, i++ ) { + /* Create the machine and embed the setting of the longest match id. */ + parts[i] = lmi->join->walk( pd ); + parts[i]->longMatchAction( pd->curActionOrd++, lmi ); + } + + /* Union machines one and up with machine zero. The grammar dictates that + * there will always be at least one part. */ + FsmAp *rtnVal = parts[0]; + for ( int i = 1; i < longestMatchList->length(); i++ ) { + rtnVal->unionOp( parts[i] ); + afterOpMinimize( rtnVal ); + } + + runLonestMatch( pd, rtnVal ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] parts; + return rtnVal; +} + +FsmAp *JoinOrLm::walk( ParseData *pd ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case JoinType: + rtnVal = join->walk( pd ); + break; + case LongestMatchType: + rtnVal = longestMatch->walk( pd ); + break; + } + return rtnVal; +} + +void JoinOrLm::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + } +} + +void JoinOrLm::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + } +} + + +/* Construct with a location and the first expression. */ +Join::Join( const InputLoc &loc, Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Construct with a location and the first expression. */ +Join::Join( Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Walk an expression node. */ +FsmAp *Join::walk( ParseData *pd ) +{ + if ( exprList.length() > 1 ) + return walkJoin( pd ); + else + return exprList.head->walk( pd ); +} + +/* There is a list of expressions to join. */ +FsmAp *Join::walkJoin( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Evaluate the machines. */ + FsmAp **fsms = new FsmAp*[exprList.length()]; + ExprList::Iter expr = exprList; + for ( int e = 0; e < exprList.length(); e++, expr++ ) + fsms[e] = expr->walk( pd ); + + /* Get the start and final names. Final is + * guaranteed to exist, start is not. */ + NameInst *startName = pd->curNameInst->start; + NameInst *finalName = pd->curNameInst->final; + + int startId = -1; + if ( startName != 0 ) { + /* Take note that there was an implicit link to the start machine. */ + pd->localNameScope->referencedNames.append( startName ); + startId = startName->id; + } + + /* A final id of -1 indicates there is no epsilon that references the + * final state, therefor do not create one or set an entry point to it. */ + int finalId = -1; + if ( finalName->numRefs > 0 ) + finalId = finalName->id; + + /* Join machines 1 and up onto machine 0. */ + FsmAp *retFsm = fsms[0]; + retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 ); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( retFsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] fsms; + return retFsm; +} + +void Join::makeNameTree( ParseData *pd ) +{ + if ( exprList.length() > 1 ) { + /* Create the new anonymous scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Join scopes need an implicit "final" target. */ + pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", + pd->nextNameId++, false ); + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; + } + else { + /* Recurse into the single expression. */ + exprList.head->makeNameTree( pd ); + } +} + + +void Join::resolveNameRefs( ParseData *pd ) +{ + /* Branch on whether or not there is to be a join. */ + if ( exprList.length() > 1 ) { + /* The variable definition enters a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* The join scope must contain a start label. */ + NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); + if ( resolved.length() > 0 ) { + /* Take the first. */ + pd->curNameInst->start = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "multiple start labels" << endl; + errorStateLabels( resolved ); + } + } + + /* Make sure there is a start label. */ + if ( pd->curNameInst->start != 0 ) { + /* There is an implicit reference to start name. */ + pd->curNameInst->start->numRefs += 1; + } + else { + /* No start label. Complain and recover by adding a label to the + * adding one. Recover ignoring the problem. */ + error(loc) << "no start label" << endl; + } + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); + } + else { + /* Recurse into the single expression. */ + exprList.head->resolveNameRefs( pd ); + } +} + +/* Clean up after an expression node. */ +Expression::~Expression() +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + delete expression; + delete term; + break; + case TermType: + delete term; + break; + case BuiltinType: + break; + } +} + +/* Evaluate a single expression node. */ +FsmAp *Expression::walk( ParseData *pd, bool lastInSeq ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd, false ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform union. */ + rtnVal->unionOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case IntersectType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform intersection. */ + rtnVal->intersectOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case SubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + + /* Evaluate the term and pad it with any* machines. */ + FsmAp *rhs = dotStarFsm( pd ); + FsmAp *termFsm = term->walk( pd ); + FsmAp *trailAnyStar = dotStarFsm( pd ); + rhs->concatOp( termFsm ); + rhs->concatOp( trailAnyStar ); + + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case TermType: { + /* Return result of the term. */ + rtnVal = term->walk( pd ); + break; + } + case BuiltinType: { + /* Duplicate the builtin. */ + rtnVal = makeBuiltin( builtin, pd ); + break; + } + } + + return rtnVal; +} + +void Expression::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->makeNameTree( pd ); + term->makeNameTree( pd ); + break; + case TermType: + term->makeNameTree( pd ); + break; + case BuiltinType: + break; + } +} + +void Expression::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->resolveNameRefs( pd ); + term->resolveNameRefs( pd ); + break; + case TermType: + term->resolveNameRefs( pd ); + break; + case BuiltinType: + break; + } +} + +/* Clean up after a term node. */ +Term::~Term() +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + delete term; + delete factorWithAug; + break; + case FactorWithAugType: + delete factorWithAug; + break; + } +} + +/* Evaluate a term node. */ +FsmAp *Term::walk( ParseData *pd, bool lastInSeq ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd, false ); + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightStartType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The start transitions right machine get the higher priority. + * Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightFinishType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case LeftType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. Since + * startTransPrior might unnecessarily increase the number of + * states during the state machine construction process (due to + * isolation), we use allTransPrior instead, which has the same + * effect. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case FactorWithAugType: { + rtnVal = factorWithAug->walk( pd ); + break; + } + } + return rtnVal; +} + +void Term::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->makeNameTree( pd ); + factorWithAug->makeNameTree( pd ); + break; + case FactorWithAugType: + factorWithAug->makeNameTree( pd ); + break; + } +} + +void Term::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->resolveNameRefs( pd ); + factorWithAug->resolveNameRefs( pd ); + break; + case FactorWithAugType: + factorWithAug->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with augmentation node. */ +FactorWithAug::~FactorWithAug() +{ + delete factorWithRep; + + /* Walk the vector of parser actions, deleting function names. */ + + /* Clean up priority descriptors. */ + if ( priorDescs != 0 ) + delete[] priorDescs; +} + +void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransAction( actionOrd[i], actions[i].action ); + break; + case at_finish: + graph->finishFsmAction( actionOrd[i], actions[i].action ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + + /* Global error actions. */ + case at_start_gbl_error: + graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); + afterOpMinimize( graph ); + break; + case at_all_gbl_error: + graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_final_gbl_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_start_gbl_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_final_gbl_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_middle_gbl_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + + /* Local error actions. */ + case at_start_local_error: + graph->startErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + afterOpMinimize( graph ); + break; + case at_all_local_error: + graph->allErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_final_local_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_start_local_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_final_local_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_middle_local_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + + /* EOF actions. */ + case at_start_eof: + graph->startEOFAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_eof: + graph->allEOFAction( actionOrd[i], actions[i].action ); + break; + case at_final_eof: + graph->finalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_eof: + graph->notStartEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_eof: + graph->notFinalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_middle_eof: + graph->middleEOFAction( actionOrd[i], actions[i].action ); + break; + + /* To State Actions. */ + case at_start_to_state: + graph->startToStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_to_state: + graph->allToStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_to_state: + graph->finalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_to_state: + graph->notStartToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_to_state: + graph->notFinalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_to_state: + graph->middleToStateAction( actionOrd[i], actions[i].action ); + break; + + /* From State Actions. */ + case at_start_from_state: + graph->startFromStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_from_state: + graph->allFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_from_state: + graph->finalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_from_state: + graph->notStartFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_from_state: + graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_from_state: + graph->middleFromStateAction( actionOrd[i], actions[i].action ); + break; + + /* Remaining cases, prevented by the parser. */ + default: + assert( false ); + break; + } + } +} + +void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd ) +{ + /* Assign priorities. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) { + switch ( priorityAugs[i].type ) { + case at_start: + graph->startFsmPrior( priorOrd[i], &priorDescs[i]); + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_finish: + graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_leave: + graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + + default: + /* Parser Prevents this case. */ + break; + } + } +} + +void FactorWithAug::assignConditions( FsmAp *graph ) +{ + for ( int i = 0; i < conditions.length(); i++ ) { + switch ( conditions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmCondition( conditions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransCondition( conditions[i].action ); + break; + case at_leave: + graph->leaveFsmCondition( conditions[i].action ); + break; + default: + break; + } + } +} + + +/* Evaluate a factor with augmentation node. */ +FsmAp *FactorWithAug::walk( ParseData *pd ) +{ + /* Enter into the scopes created for the labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start || + actions[i].type == at_start_gbl_error || + actions[i].type == at_start_local_error || + actions[i].type == at_start_to_state || + actions[i].type == at_start_from_state || + actions[i].type == at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmAp *rtnVal = factorWithRep->walk( pd ); + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start && + actions[i].type != at_start_gbl_error && + actions[i].type != at_start_local_error && + actions[i].type != at_start_to_state && + actions[i].type != at_start_from_state && + actions[i].type != at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + assignConditions( rtnVal ); + + assignActions( pd, rtnVal , actionOrd ); + + /* Make the array of priority orderings. Orderings are local to this walk + * of the factor with augmentation. */ + int *priorOrd = 0; + if ( priorityAugs.length() > 0 ) + priorOrd = new int[priorityAugs.length()]; + + /* Walk all priorities, assigning the priority ordering. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) + priorOrd[i] = pd->curPriorOrd++; + + /* If the priority descriptors have not been made, make them now. Make + * priority descriptors for each priority asignment that will be passed to + * the fsm. Used to keep track of the key, value and used bit. */ + if ( priorDescs == 0 && priorityAugs.length() > 0 ) { + priorDescs = new PriorDesc[priorityAugs.length()]; + for ( int i = 0; i < priorityAugs.length(); i++ ) { + /* Init the prior descriptor for the priority setting. */ + priorDescs[i].key = priorityAugs[i].priorKey; + priorDescs[i].priority = priorityAugs[i].priorValue; + } + } + + /* Assign priorities into the machine. */ + assignPriorities( rtnVal, priorOrd ); + + /* Assign epsilon transitions. */ + for ( int e = 0; e < epsilonLinks.length(); e++ ) { + /* Get the name, which may not exist. If it doesn't then silently + * ignore it because an error has already been reported. */ + NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; + if ( epTarg != 0 ) { + /* Make the epsilon transitions. */ + rtnVal->epsilonTrans( epTarg->id ); + + /* Note that we have made a link to the name. */ + pd->localNameScope->referencedNames.append( epTarg ); + } + } + + /* Set entry points for labels. */ + if ( labels.length() > 0 ) { + /* Pop the names. */ + pd->resetNameScope( nameFrame ); + + /* Make labels that are referenced into entry points. */ + for ( int i = 0; i < labels.length(); i++ ) { + pd->enterNameScope( false, 1 ); + + /* Will always be found. */ + NameInst *name = pd->curNameInst; + + /* If the name is referenced then set the entry point. */ + if ( name->numRefs > 0 ) + rtnVal->setEntry( name->id, rtnVal->startState ); + } + + pd->popNameScope( nameFrame ); + } + + if ( priorOrd != 0 ) + delete[] priorOrd; + if ( actionOrd != 0 ) + delete[] actionOrd; + return rtnVal; +} + +void FactorWithAug::makeNameTree( ParseData *pd ) +{ + /* Add the labels to the tree of instantiated names. Each label + * makes a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + for ( int i = 0; i < labels.length(); i++ ) + pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); + + /* Recurse, then pop the names. */ + factorWithRep->makeNameTree( pd ); + pd->curNameInst = prevNameInst; +} + + +void FactorWithAug::resolveNameRefs( ParseData *pd ) +{ + /* Enter into the name scope created by any labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Note action references. */ + for ( int i = 0; i < actions.length(); i++ ) + actions[i].action->actionRefs.append( pd->localNameScope ); + + /* Recurse first. IMPORTANT: we must do the exact same traversal as when + * the tree is constructed. */ + factorWithRep->resolveNameRefs( pd ); + + /* Resolve epsilon transitions. */ + for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { + /* Get the link. */ + EpsilonLink &link = epsilonLinks[ep]; + NameInst *resolvedName = 0; + + if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) { + /* Epsilon drawn to an implicit final state. An implicit final is + * only available in join operations. */ + resolvedName = pd->localNameScope->final; + } + else { + /* Do an search for the name. */ + NameSet resolved; + pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); + if ( resolved.length() > 0 ) { + /* Take the first one. */ + resolvedName = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(link.loc) << "state reference " << link.target << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + /* This is tricky, we stuff resolved epsilon transitions into one long + * vector in the parse data structure. Since the name resolution and + * graph generation both do identical walks of the parse tree we + * should always find the link resolutions in the right place. */ + pd->epsilonResolvedLinks.append( resolvedName ); + + if ( resolvedName != 0 ) { + /* Found the name, bump of the reference count on it. */ + resolvedName->numRefs += 1; + } + else { + /* Complain, no recovery action, the epsilon op will ignore any + * epsilon transitions whose names did not resolve. */ + error(link.loc) << "could not resolve label " << link.target << endl; + } + } + + if ( labels.length() > 0 ) + pd->popNameScope( nameFrame ); +} + + +/* Clean up after a factor with repetition node. */ +FactorWithRep::~FactorWithRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorWithRep; + break; + case FactorWithNegType: + delete factorWithNeg; + break; + } +} + +/* Evaluate a factor with repetition node. */ +FsmAp *FactorWithRep::walk( ParseData *pd ) +{ + FsmAp *retFsm = 0; + + switch ( type ) { + case StarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case StarStarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case OptionalType: { + /* Make the null fsm. */ + FsmAp *nu = new FsmAp(); + nu->lambdaFsm( ); + + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + + /* Perform the question operator. */ + retFsm->unionOp( nu ); + afterOpMinimize( retFsm ); + break; + } + case PlusType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying plus operator to a machine that " + "accpets zero length word" << endl; + } + + /* Need a duplicated for the star end. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* The start func orders need to be shifted before doing the star. */ + pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + break; + } + case ExactType: { + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MaxType: { + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MinType: { + /* Evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the repetition + * and the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Acts just like a star op on the machine to return. */ + retFsm->starOp( ); + afterOpMinimize( retFsm ); + } + else { + /* Take a duplicate for the plus. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + /* Tak on the kleene star. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + break; + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. This + * defeats the purpose so give a warning. */ + warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Now need to evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing both kinds + * of repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Do optional repetition on the second half. */ + dup->optionalRepeatOp( upperRep - lowerRep ); + afterOpMinimize( dup ); + + /* Tak on the duplicate machine. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + } + break; + } + case FactorWithNegType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factorWithNeg->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithRep::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->makeNameTree( pd ); + break; + case FactorWithNegType: + factorWithNeg->makeNameTree( pd ); + break; + } +} + +void FactorWithRep::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->resolveNameRefs( pd ); + break; + case FactorWithNegType: + factorWithNeg->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with negation node. */ +FactorWithNeg::~FactorWithNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorWithNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmAp *FactorWithNeg::walk( ParseData *pd ) +{ + FsmAp *retFsm = 0; + + switch ( type ) { + case NegateType: { + /* Evaluate the factorWithNeg. */ + FsmAp *toNegate = factorWithNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + retFsm = dotStarFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case CharNegateType: { + /* Evaluate the factorWithNeg. */ + FsmAp *toNegate = factorWithNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + retFsm = dotFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factor->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithNeg::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->makeNameTree( pd ); + break; + case FactorType: + factor->makeNameTree( pd ); + break; + } +} + +void FactorWithNeg::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->resolveNameRefs( pd ); + break; + case FactorType: + factor->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor node. */ +Factor::~Factor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExp; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + case LongestMatchType: + delete longestMatch; + break; + } +} + +/* Evaluate a factor node. */ +FsmAp *Factor::walk( ParseData *pd ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case LiteralType: + rtnVal = literal->walk( pd ); + break; + case RangeType: + rtnVal = range->walk( pd ); + break; + case OrExprType: + rtnVal = reItem->walk( pd, 0 ); + break; + case RegExprType: + rtnVal = regExp->walk( pd, 0 ); + break; + case ReferenceType: + rtnVal = varDef->walk( pd ); + break; + case ParenType: + rtnVal = join->walk( pd ); + break; + case LongestMatchType: + rtnVal = longestMatch->walk( pd ); + break; + } + + return rtnVal; +} + +void Factor::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->makeNameTree( loc, pd ); + break; + case ParenType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + } +} + +void Factor::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->resolveNameRefs( pd ); + break; + case ParenType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + } +} + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +bool Range::verifyRangeFsm( FsmAp *rangeEnd ) +{ + /* Must have two states. */ + if ( rangeEnd->stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( rangeEnd->startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( rangeEnd->finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( rangeEnd->startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + TransAp *startTrans = rangeEnd->startState->outList.head; + if ( startTrans->lowKey != startTrans->highKey ) + return false; + return true; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmAp *Range::walk( ParseData *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmAp *lowerFsm = lowerLit->walk( pd ); + if ( !verifyRangeFsm( lowerFsm ) ) { + error(lowerLit->token.loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmAp *upperFsm = upperLit->walk( pd ); + if ( !verifyRangeFsm( upperFsm ) ) { + error(upperLit->token.loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeFsm( lowKey, highKey ); + return retFsm; +} + +/* Evaluate a literal object. */ +FsmAp *Literal::walk( ParseData *pd ) +{ + /* FsmAp to return, is the alphabet signed. */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd ); + /* Make the new machine. */ + rtnVal = new FsmAp(); + rtnVal->concatFsm( fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + Token interp; + bool caseInsensitive; + token.prepareLitString( interp, caseInsensitive ); + Key *arr = new Key[interp.length]; + makeFsmKeyArray( arr, interp.data, interp.length, pd ); + + /* Make the new machine. */ + rtnVal = new FsmAp(); + if ( caseInsensitive ) + rtnVal->concatFsmCI( arr, interp.length ); + else + rtnVal->concatFsm( arr, interp.length ); + delete[] interp.data; + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExp; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + FsmAp *fsm1 = regExp->walk( pd, rootRegex ); + FsmAp *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->concatOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[token.length]; + makeFsmKeyArray( arr, token.data, token.length, pd ); + + /* Make the concat fsm. */ + rtnVal = new FsmAp(); + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal->concatFsmCI( arr, token.length ); + else + rtnVal->concatFsm( arr, token.length ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = dotFsm( pd ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmAp *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = dotFsm( pd ); + rtnVal->subtractOp( fsm ); + rtnVal->minimizePartition2(); + break; + } + } + + /* If the item is followed by a star, then apply the star op. */ + if ( star ) { + if ( rtnVal->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accpets zero length word" << endl; + } + + rtnVal->starOp(); + rtnVal->minimizePartition2(); + } + return rtnVal; +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmAp *fsm1 = orBlock->walk( pd, rootRegex ); + FsmAp *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->unionOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The return value, is the alphabet signed? */ + FsmAp *rtnVal = 0; + switch ( type ) { + case Data: { + /* Make the or machine. */ + rtnVal = new FsmAp(); + + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet; + makeFsmUniqueKeyArray( keySet, token.data, token.length, + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal->orFsm( keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = new FsmAp(); + rtnVal->rangeFsm( lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( lowKey <= 'Z' && 'A' <= highKey ) { + Key otherLow = lowKey < 'A' ? Key('A') : lowKey; + Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; + + otherLow = 'a' + ( otherLow - 'A' ); + otherHigh = 'a' + ( otherHigh - 'A' ); + + FsmAp *otherRange = new FsmAp(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + else if ( lowKey <= 'z' && 'a' <= highKey ) { + Key otherLow = lowKey < 'a' ? Key('a') : lowKey; + Key otherHigh = 'z' < highKey ? Key('z') : highKey; + + otherLow = 'A' + ( otherLow - 'a' ); + otherHigh = 'A' + ( otherHigh - 'a' ); + + FsmAp *otherRange = new FsmAp(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/ragel/parsetree.h b/ragel/parsetree.h new file mode 100644 index 0000000..c340171 --- /dev/null +++ b/ragel/parsetree.h @@ -0,0 +1,761 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" + +struct NameInst; + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + +/* Location in an input file. */ +struct InputLoc +{ + char *fileName; + int line; + int col; +}; + +struct ParseData; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct JoinOrLm; +struct LongestMatch; +struct LongestMatchPart; +struct LmPartList; +struct Range; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +typedef Vector<char*> NameRef; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef &target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef target; +}; + +struct Label +{ + Label( const InputLoc &loc, char *data ) + : loc(loc), data(data) { } + + InputLoc loc; + char *data; +}; + +/* Structrue represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct Token +{ + char *data; + int length; + InputLoc loc; + + void prepareLitString( Token &result, bool &caseInsensitive ); + void append( const Token &other ); + void set( char *str, int len ); +}; + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef( char *name, JoinOrLm *joinOrLm ) + : name(name), joinOrLm(joinOrLm) { } + + /* Parse tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( const InputLoc &loc, ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + char *name; + JoinOrLm *joinOrLm; +}; + + +/* + * LongestMatch + * + * Wherever possible the item match will execute on the character. If not + * possible the item match will execute on a lookahead character and either + * hold the current char (if one away) or backup. + * + * How to handle the problem of backing up over a buffer break? + * + * Don't want to use pending out transitions for embedding item match because + * the role of item match action is different: it may sometimes match on the + * final transition, or may match on a lookahead character. + * + * Don't want to invent a new operator just for this. So just trail action + * after machine, this means we can only use literal actions. + * + * The item action may + * + * What states of the machine will be final. The item actions that wrap around + * on the last character will go straight to the start state. + * + * Some transitions will be lookahead transitions, they will hold the current + * character. Crossing them with regular transitions must be restricted + * because it does not make sense. The transition cannot simultaneously hold + * and consume the current character. + */ +struct LongestMatchPart +{ + LongestMatchPart( Join *join, Action *action, + InputLoc &semiLoc, int longestMatchId ) + : + join(join), action(action), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false) { } + + InputLoc getLoc(); + + Join *join; + Action *action; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + LongestMatch *longestMatch; + + LongestMatchPart *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct LmPartList : DList<LongestMatchPart> {}; + +struct LongestMatch +{ + /* Construct with a list of joins */ + LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : + loc(loc), longestMatchList(longestMatchList), name(0), + lmSwitchHandlesError(false) { } + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + void runLonestMatch( ParseData *pd, FsmAp *graph ); + Action *newAction( ParseData *pd, const InputLoc &loc, char *name, + InlineList *inlineList ); + void makeActions( ParseData *pd ); + void findName( ParseData *pd ); + void restart( FsmAp *graph, TransAp *trans ); + + InputLoc loc; + LmPartList *longestMatchList; + char *name; + + Action *lmActSelect; + bool lmSwitchHandlesError; + + LongestMatch *next, *prev; +}; + + +/* List of Expressions. */ +typedef DList<Expression> ExprList; + +struct JoinOrLm +{ + enum Type { + JoinType, + LongestMatchType + }; + + JoinOrLm( Join *join ) : + join(join), type(JoinType) {} + JoinOrLm( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Join *join; + LongestMatch *longestMatch; + Type type; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + Join( const InputLoc &loc, Expression *expr ); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + FsmAp *walkJoin( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Data. */ + InputLoc loc; + ExprList exprList; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + builtin(builtin), type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), builtin(builtin), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmAp *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Term *term; + FactorWithAug *factorWithAug; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) : + priorDescs(0), factorWithRep(factorWithRep) { } + ~FactorWithAug(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); + void assignPriorities( FsmAp *graph, int *priorOrd ); + + void assignConditions( FsmAp *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + Vector<PriorityAug> priorityAugs; + PriorDesc *priorDescs; + Vector<Label> labels; + Vector<EpsilonLink> epsilonLinks; + Vector<ParserAction> conditions; + + FactorWithRep *factorWithRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct FactorWithRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorWithNegType + }; + + FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, + int lowerRep, int upperRep, Type type ) : + loc(loc), factorWithRep(factorWithRep), + factorWithNeg(0), lowerRep(lowerRep), + upperRep(upperRep), type(type) { } + + FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg ) + : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { } + + ~FactorWithRep(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[2]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct FactorWithNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : + loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } + + FactorWithNeg( const InputLoc &loc, Factor *factor ) : + loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { } + + ~FactorWithNeg(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithNeg *factorWithNeg; + Factor *factor; + Type type; +}; + +/* + * Factor + */ +struct Factor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType, + LongestMatchType, + }; + + /* Construct with a literal fsm. */ + Factor( Literal *literal ) : + literal(literal), type(LiteralType) { } + + /* Construct with a range. */ + Factor( Range *range ) : + range(range), type(RangeType) { } + + /* Construct with the or part of a regular expression. */ + Factor( ReItem *reItem ) : + reItem(reItem), type(OrExprType) { } + + /* Construct with a regular expression. */ + Factor( RegExpr *regExp ) : + regExp(regExp), type(RegExprType) { } + + /* Construct with a reference to a var def. */ + Factor( const InputLoc &loc, VarDef *varDef ) : + loc(loc), varDef(varDef), type(ReferenceType) {} + + /* Construct with a parenthesized join. */ + Factor( Join *join ) : + join(join), type(ParenType) {} + + /* Construct with a longest match operator. */ + Factor( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + /* Cleanup. */ + ~Factor(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExp; + VarDef *varDef; + Join *join; + LongestMatch *longestMatch; + int lower, upper; + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + Range( Literal *lowerLit, Literal *upperLit ) + : lowerLit(lowerLit), upperLit(upperLit) { } + + ~Range(); + FsmAp *walk( ParseData *pd ); + bool verifyRangeFsm( FsmAp *rangeEnd ); + + Literal *lowerLit; + Literal *upperLit; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString }; + + Literal( const Token &token, LiteralType type ) + : token(token), type(type) { } + + FsmAp *walk( ParseData *pd ); + + Token token; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + RegExpr() : + type(Empty), caseInsensitive(false) { } + RegExpr(RegExpr *regExp, ReItem *item) : + regExp(regExp), item(item), + type(RecurseItem), caseInsensitive(false) { } + + ~RegExpr(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + RegExpr *regExp; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + ReItem( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), star(false), type(Data) { } + ReItem( const InputLoc &loc, ReItemType type ) + : loc(loc), star(false), type(type) { } + ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) + : loc(loc), orBlock(orBlock), star(false), type(type) { } + + ~ReItem(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Token token; + ReOrBlock *orBlock; + bool star; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + ReOrBlock() + : type(Empty) { } + ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) + : orBlock(orBlock), item(item), type(RecurseItem) { } + + ~ReOrBlock(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + ReOrItem( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), type(Data) {} + ReOrItem( const InputLoc &loc, char lower, char upper ) + : loc(loc), lower(lower), upper(upper), type(Range) { } + + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Token token; + char lower; + char upper; + ReOrItemType type; +}; + + +/* + * Inline code tree + */ +struct InlineList; +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar, + Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId, + LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct, + LmInitTokStart, LmSetTokStart, Break + }; + + InlineItem( const InputLoc &loc, char *data, Type type ) : + loc(loc), data(data), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : + loc(loc), data(0), nameRef(nameRef), children(0), type(type) { } + + InlineItem( const InputLoc &loc, LongestMatch *longestMatch, + LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0), + nameRef(0), children(0), longestMatch(longestMatch), + longestMatchPart(longestMatchPart), type(type) { } + + InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : + loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0), + type(type) { } + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), data(0), nameRef(0), children(0), type(type) { } + + InputLoc loc; + char *data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + LongestMatch *longestMatch; + LongestMatchPart *longestMatchPart; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + + + +#endif /* _PARSETREE_H */ diff --git a/ragel/ragel.h b/ragel/ragel.h new file mode 100644 index 0000000..58f8a88 --- /dev/null +++ b/ragel/ragel.h @@ -0,0 +1,80 @@ +/* + * Copyright 2001-2003 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RAGEL_H +#define _RAGEL_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <string> +#include "config.h" + +#define PROGNAME "ragel" + +/* To what degree are machine minimized. */ +enum MinimizeLevel { + MinimizeApprox, + MinimizeStable, + MinimizePartition1, + MinimizePartition2 +}; + +enum MinimizeOpt { + MinimizeNone, + MinimizeEnd, + MinimizeMostOps, + MinimizeEveryOp +}; + + +/* IO filenames and stream. */ +extern char *outputFileName; +extern std::istream *inStream; +extern std::ostream *outStream; + +/* Options. */ +extern MinimizeLevel minimizeLevel; +extern MinimizeOpt minimizeOpt; +extern char *machineSpec, *machineName; +extern bool printStatistics; + +extern int gblErrorCount; +extern char machineMain[]; + +/* Error reporting. */ +struct InputLoc; +std::ostream &error(); +std::ostream &error( const InputLoc &loc ); +std::ostream &warning( ); +std::ostream &warning( const InputLoc &loc ); + +void scan( char *fileName, std::istream &input ); +void terminateAllParsers( ); +void checkMachines( ); +void writeMachines( std::ostream &out, std::string hostData, char *inputFileName ); +void xmlEscapeHost( std::ostream &out, char *data, int len ); + + +/* Size of the include stack. */ +#define INCLUDE_STACK_SIZE 32 + +#endif /* _RAGEL_H */ diff --git a/ragel/rlparse.kh b/ragel/rlparse.kh new file mode 100644 index 0000000..5d7b404 --- /dev/null +++ b/ragel/rlparse.kh @@ -0,0 +1,122 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef RLPARSE_H +#define RLPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" + +extern char *lelNames[]; + +struct LangEl; + +struct Parser +{ + %%{ + parser Parser; + + # These must be declared first and in this order. Ragel currently cannot + # import kelbt keywords for use in machines, so in the scanner + # rely on knowing the values that kelbt will assign to these. + token KW_Machine, KW_Include, KW_Write, TK_Word, TK_Literal; + + token TK_Number, TK_Inline, TK_Reference, TK_ColonEquals, TK_EndSection; + + # General tokens. + token TK_UInt, TK_Hex, TK_Word, TK_Literal, TK_BaseClause, + TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, TK_Arrow, + TK_DoubleArrow, TK_StarStar, TK_ColonEquals, TK_NameSep, TK_BarStar, + TK_DashDash; + + # Conditions. + token TK_StartCond, TK_AllCond, TK_LeavingCond; + + token TK_Middle; + + # Global error actions. + token TK_StartGblError, TK_AllGblError, TK_FinalGblError, + TK_NotFinalGblError, TK_NotStartGblError, TK_MiddleGblError; + + # Local error actions. + token TK_StartLocalError, TK_AllLocalError, TK_FinalLocalError, + TK_NotFinalLocalError, TK_NotStartLocalError, TK_MiddleLocalError; + + # EOF Action embedding. + token TK_StartEOF, TK_AllEOF, TK_FinalEOF, TK_NotFinalEOF, TK_NotStartEOF, + TK_MiddleEOF; + + # To State Actions. + token TK_StartToState, TK_AllToState, TK_FinalToState, TK_NotFinalToState, + TK_NotStartToState, TK_MiddleToState; + + # In State Actions. + token TK_StartFromState, TK_AllFromState, TK_FinalFromState, + TK_NotFinalFromState, TK_NotStartFromState, TK_MiddleFromState; + + # Regular expression tokens. */ + token RE_Slash, RE_SqOpen, RE_SqOpenNeg, RE_SqClose, RE_Dot, RE_Star, + RE_Dash, RE_Char; + + # Tokens specific to inline code. + token IL_WhiteSpace, IL_Comment, IL_Literal, IL_Symbol; + + # Keywords. + token KW_Action, KW_AlphType, KW_Range, KW_GetKey, KW_Include, KW_Write, + KW_Machine, KW_When, KW_Eof, KW_Err, KW_Lerr, KW_To, KW_From; + + # Specials in code blocks. + token KW_Break, KW_Exec, KW_Hold, KW_PChar, KW_Char, KW_Goto, KW_Call, + KW_Ret, KW_CurState, KW_TargState, KW_Entry, KW_Next, KW_Exec, + KW_Variable, KW_Access; + + # Special token for terminating semi-terminated code blocks. Needed because + # semi is sent as a token in the code block rather than as a generic + # symbol. + token TK_Semi; + + interface; + }%% + + Parser( char *fileName, char *sectionName, InputLoc §ionLoc ) + : sectionName(sectionName) + { + pd = new ParseData( fileName, sectionName, sectionLoc ); + } + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void tryMachineDef( InputLoc &loc, char *name, + JoinOrLm *joinOrLm, bool isInstance ); + + /* Report an error encountered by the parser. */ + ostream &parser_error( int tokId, Token &token ); + + ParseData *pd; + + /* The name of the root section, this does not change during an include. */ + char *sectionName; + + NameRef nameRef; + NameRefList nameRefList; +}; + +#endif diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl new file mode 100644 index 0000000..b39fa5c --- /dev/null +++ b/ragel/rlparse.kl @@ -0,0 +1,1402 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlparse.h" +#include "ragel.h" +#include <iostream> + +using std::cout; +using std::cerr; +using std::endl; + +ParserDict parserDict; + +%%{ + +parser Parser; + +include "rlparse.kh"; + +start: statement_list; + +statement_list: statement_list statement; +statement_list: ; + +statement: assignment commit; +statement: instantiation commit; +statement: action_spec commit; +statement: alphtype_spec commit; +statement: range_spec commit; +statement: getkey_spec commit; +statement: access_spec commit; +statement: variable_spec commit; + +# We use end section tokens to draw firm boundaries between sections. +statement: TK_EndSection; + +assignment: + machine_name '=' join ';' final { + /* Main machine must be an instance. */ + bool isInstance = false; + if ( strcmp($1->token.data, machineMain) == 0 ) { + warning($1->token.loc) << + "main machine will be implicitly instantiated" << endl; + isInstance = true; + } + + /* Generic creation of machine for instantiation and assignment. */ + JoinOrLm *joinOrLm = new JoinOrLm( $3->join ); + tryMachineDef( $1->token.loc, $1->token.data, joinOrLm, isInstance ); + }; + +instantiation: + machine_name TK_ColonEquals join_or_lm ';' final { + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( $1->token.loc, $1->token.data, $3->joinOrLm, true ); + }; + +type token_type +{ + Token token; +}; + +nonterm machine_name uses token_type; + +machine_name: + TK_Word final { + //cerr << "parser: machine name" << endl; + + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + $$->token = *$1; + }; + +action_spec: + KW_Action TK_Word '{' inline_block '}' final { + if ( pd->actionDict.find( $2->data ) ) { + /* Recover by just ignoring the duplicate. */ + error($2->loc) << "action \"" << $2->data << "\" already defined" << endl; + } + else { + //cerr << "NEW ACTION " << $2->data << " " << $4->inlineList << endl; + /* Add the action to the list of actions. */ + Action *newAction = new Action( $3->loc, $2->data, $4->inlineList ); + + /* Insert to list and dict. */ + pd->actionList.append( newAction ); + pd->actionDict.insert( newAction ); + } + }; + +# Specifies the data type of the input alphabet. One or two words followed by a +# semi-colon. +alphtype_spec: + KW_AlphType TK_Word TK_Word ';' final { + if ( ! pd->setAlphType( $2->data, $3->data ) ) { + // Recover by ignoring the alphtype statement. + error($2->loc) << "\"" << $2->data << + " " << $3->data << "\" is not a valid alphabet type" << endl; + } + }; + +alphtype_spec: + KW_AlphType TK_Word ';' final { + if ( ! pd->setAlphType( $2->data ) ) { + // Recover by ignoring the alphtype statement. + error($2->loc) << "\"" << $2->data << + "\" is not a valid alphabet type" << endl; + } + }; + +# Specifies a range to assume that the input characters will fall into. +range_spec: + KW_Range alphabet_num alphabet_num ';' final { + // Save the upper and lower ends of the range and emit the line number. + pd->lowerNum = $2->token.data; + pd->upperNum = $3->token.data; + pd->rangeLowLoc = $2->token.loc; + pd->rangeHighLoc = $3->token.loc; + }; + +getkey_spec: + KW_GetKey inline_expr ';' final { + pd->getKeyExpr = $2->inlineList; + }; + +access_spec: + KW_Access inline_expr ';' final { + pd->accessExpr = $2->inlineList; + }; + +variable_spec: + KW_Variable opt_whitespace TK_Word inline_expr ';' final { + /* FIXME: Need to implement the rest of this. */ + if ( strcmp( $3->data, "curstate" ) == 0 ) + pd->curStateExpr = $4->inlineList; + else { + error($3->loc) << "sorry, unimplementd" << endl; + } + }; + +opt_whitespace: opt_whitespace IL_WhiteSpace; +opt_whitespace: ; + +# +# Expressions +# + +nonterm join_or_lm +{ + JoinOrLm *joinOrLm; +}; + +join_or_lm: + join final { + $$->joinOrLm = new JoinOrLm( $1->join ); + }; +join_or_lm: + TK_BarStar lm_part_list '*' '|' final { + /* Create a new factor going to a longest match structure. Record + * in the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( $1->loc, $2->lmPartList ); + pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *($2->lmPartList); lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + $$->joinOrLm = new JoinOrLm( lm ); + }; + +nonterm lm_part_list +{ + LmPartList *lmPartList; +}; + +lm_part_list: + lm_part_list longest_match_part final { + if ( $2->lmPart != 0 ) + $1->lmPartList->append( $2->lmPart ); + $$->lmPartList = $1->lmPartList; + }; +lm_part_list: + longest_match_part final { + /* Create a new list with the part. */ + $$->lmPartList = new LmPartList; + if ( $1->lmPart != 0 ) + $$->lmPartList->append( $1->lmPart ); + }; + +nonterm longest_match_part +{ + LongestMatchPart *lmPart; +}; + +longest_match_part: + action_spec final { $$->lmPart = 0; }; +longest_match_part: + assignment final { $$->lmPart = 0; }; +longest_match_part: + join opt_lm_part_action ';' final { + $$->lmPart = 0; + Action *action = $2->action; + if ( action != 0 ) + action->isLmAction = true; + $$->lmPart = new LongestMatchPart( $1->join, action, + $3->loc, pd->nextLongestMatchId++ ); + }; + +nonterm opt_lm_part_action +{ + Action *action; +}; + +opt_lm_part_action: + TK_DoubleArrow action_embed final { + $$->action = $2->action; + }; +opt_lm_part_action: + action_embed_block final { + $$->action = $1->action; + }; +opt_lm_part_action: + final { + $$->action = 0; + }; + + +nonterm join +{ + Join *join; +}; + +join: + join ',' expression final { + /* Append the expression to the list and return it. */ + $1->join->exprList.append( $3->expression ); + $$->join = $1->join; + }; +join: + expression final { + $$->join = new Join( $1->expression ); + }; + +nonterm expression +{ + Expression *expression; +}; + +expression: + expression '|' term final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::OrType ); + }; +expression: + expression '&' term final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::IntersectType ); + }; +expression: + expression pri(1) '-' term final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::SubtractType ); + }; +expression: + expression TK_DashDash term final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::StrongSubtractType ); + }; +expression: + term final { + $$->expression = new Expression( $1->term ); + }; + +nonterm term +{ + Term *term; +}; + +term: + term factor_with_label final { + /* FIXME: Need to reject this if of the form (term . -num). */ + $$->term = new Term( $1->term, $2->factorWithAug ); + }; +term: + term '.' factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug ); + }; +term: + term TK_ColonGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); + }; +term: + term TK_ColonGtGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); + }; +term: + term TK_LtColon factor_with_label final { + $$->term = new Term( $1->term, + $3->factorWithAug, Term::LeftType ); + }; +term: + factor_with_label final { + $$->term = new Term( $1->factorWithAug ); + }; + +nonterm factor_with_label +{ + FactorWithAug *factorWithAug; +}; + +factor_with_label: + TK_Word ':' factor_with_label final { + /* Add the label to the list and pass the factor up. */ + $3->factorWithAug->labels.prepend( Label($1->loc, $1->data) ); + $$->factorWithAug = $3->factorWithAug; + }; +factor_with_label: + factor_with_ep final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_ep +{ + FactorWithAug *factorWithAug; +}; + +factor_with_ep: + factor_with_ep TK_Arrow local_state_ref final { + /* Add the target to the list and return the factor object. */ + $1->factorWithAug->epsilonLinks.append( EpsilonLink( $2->loc, nameRef ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_ep: + factor_with_aug final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_aug +{ + FactorWithAug *factorWithAug; +}; + +factor_with_aug: + factor_with_aug aug_type_base action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( + ParserAction( $2->loc, $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_base priority_aug final { + /* Append the named priority to the factorWithAug and pass it up. */ + $1->factorWithAug->priorityAugs.append( + PriorityAug( $2->augType, pd->curDefPriorKey, $3->priorityNum ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_base '(' priority_name ',' priority_aug ')' final { + /* Append the priority using a default name. */ + $1->factorWithAug->priorityAugs.append( + PriorityAug( $2->augType, $4->priorityName, $6->priorityNum ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_cond action_embed final { + $1->factorWithAug->conditions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_to_state action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_from_state action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_eof action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_gbl_error action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, pd->curDefLocalErrKey, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_local_error action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, pd->curDefLocalErrKey, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_local_error '(' local_err_name ',' action_embed ')' final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, $4->error_name, $6->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_rep final { + $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); + }; + +type aug_type +{ + InputLoc loc; + AugType augType; +}; + +# Classes of transtions on which to embed actions or change priorities. +nonterm aug_type_base uses aug_type; + +aug_type_base: '@' final { $$->loc = $1->loc; $$->augType = at_finish; }; +aug_type_base: '%' final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_base: '$' final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_base: '>' final { $$->loc = $1->loc; $$->augType = at_start; }; + +# Embedding conditions. +nonterm aug_type_cond uses aug_type; + +aug_type_cond: TK_StartCond final { $$->loc = $1->loc; $$->augType = at_start; }; +aug_type_cond: '>' KW_When final { $$->loc = $1->loc; $$->augType = at_start; }; +aug_type_cond: TK_AllCond final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_cond: '$' KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_cond: TK_LeavingCond final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_cond: '%' KW_When final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_cond: KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; + +# +# To state actions. +# + +nonterm aug_type_to_state uses aug_type; + +aug_type_to_state: TK_StartToState + final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; +aug_type_to_state: '>' KW_To + final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; + +aug_type_to_state: TK_NotStartToState + final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; +aug_type_to_state: '<' KW_To + final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; + +aug_type_to_state: TK_AllToState + final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; +aug_type_to_state: '$' KW_To + final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; + +aug_type_to_state: TK_FinalToState + final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; +aug_type_to_state: '%' KW_To + final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; + +aug_type_to_state: TK_NotFinalToState + final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; +aug_type_to_state: '@' KW_To + final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; + +aug_type_to_state: TK_MiddleToState + final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; +aug_type_to_state: TK_Middle KW_To + final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; + +# +# From state actions. +# + +nonterm aug_type_from_state uses aug_type; + +aug_type_from_state: TK_StartFromState + final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; +aug_type_from_state: '>' KW_From + final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; + +aug_type_from_state: TK_NotStartFromState + final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; +aug_type_from_state: '<' KW_From + final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; + +aug_type_from_state: TK_AllFromState + final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; +aug_type_from_state: '$' KW_From + final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; + +aug_type_from_state: TK_FinalFromState + final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; +aug_type_from_state: '%' KW_From + final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; + +aug_type_from_state: TK_NotFinalFromState + final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; +aug_type_from_state: '@' KW_From + final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; + +aug_type_from_state: TK_MiddleFromState + final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; +aug_type_from_state: TK_Middle KW_From + final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; + +# +# Eof state actions. +# + +nonterm aug_type_eof uses aug_type; + +aug_type_eof: TK_StartEOF + final { $$->loc = $1->loc; $$->augType = at_start_eof; }; +aug_type_eof: '>' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_start_eof; }; + +aug_type_eof: TK_NotStartEOF + final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; +aug_type_eof: '<' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; + +aug_type_eof: TK_AllEOF + final { $$->loc = $1->loc; $$->augType = at_all_eof; }; +aug_type_eof: '$' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_all_eof; }; + +aug_type_eof: TK_FinalEOF + final { $$->loc = $1->loc; $$->augType = at_final_eof; }; +aug_type_eof: '%' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_final_eof; }; + +aug_type_eof: TK_NotFinalEOF + final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; +aug_type_eof: '@' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; + +aug_type_eof: TK_MiddleEOF + final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; +aug_type_eof: TK_Middle KW_Eof + final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; + +# +# Global error actions. +# + +nonterm aug_type_gbl_error uses aug_type; + +aug_type_gbl_error: TK_StartGblError + final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; +aug_type_gbl_error: '>' KW_Err + final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; + +aug_type_gbl_error: TK_NotStartGblError + final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; +aug_type_gbl_error: '<' KW_Err + final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; + +aug_type_gbl_error: TK_AllGblError + final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; +aug_type_gbl_error: '$' KW_Err + final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; + +aug_type_gbl_error: TK_FinalGblError + final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; +aug_type_gbl_error: '%' KW_Err + final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; + +aug_type_gbl_error: TK_NotFinalGblError + final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; +aug_type_gbl_error: '@' KW_Err + final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; + +aug_type_gbl_error: TK_MiddleGblError + final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; +aug_type_gbl_error: TK_Middle KW_Err + final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; + + +# +# Local error actions. +# + +nonterm aug_type_local_error uses aug_type; + +aug_type_local_error: TK_StartLocalError + final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; +aug_type_local_error: '>' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; + +aug_type_local_error: TK_NotStartLocalError + final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; +aug_type_local_error: '<' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; + +aug_type_local_error: TK_AllLocalError + final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; +aug_type_local_error: '$' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; + +aug_type_local_error: TK_FinalLocalError + final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; +aug_type_local_error: '%' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; + +aug_type_local_error: TK_NotFinalLocalError + final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; +aug_type_local_error: '@' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; + +aug_type_local_error: TK_MiddleLocalError + final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; +aug_type_local_error: TK_Middle KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; + + +type action_ref +{ + Action *action; +}; + +# Different ways to embed actions. A TK_Word is reference to an action given by +# the user as a statement in the fsm specification. An action can also be +# specified immediately. +nonterm action_embed uses action_ref; + +action_embed: action_embed_word final { $$->action = $1->action; }; +action_embed: action_embed_block final { $$->action = $1->action; }; + +nonterm action_embed_word uses action_ref; + +action_embed_word: + TK_Word final { + /* Set the name in the actionDict. */ + Action *action = pd->actionDict.find( $1->data ); + if ( action != 0 ) { + /* Pass up the action element */ + $$->action = action; + } + else { + /* Will recover by returning null as the action. */ + error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl; + $$->action = 0; + } + }; + +nonterm action_embed_block uses action_ref; + +action_embed_block: + '{' inline_block '}' final { + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( $1->loc, 0, $2->inlineList ); + pd->actionList.append( newAction ); + $$->action = newAction; + }; + +nonterm priority_name +{ + int priorityName; +}; + +# A specified priority name. Looks up the name in the current priority +# dictionary. +priority_name: + TK_Word final { + // Lookup/create the priority key. + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + + // Use the inserted/found priority key. + $$->priorityName = priorDictEl->value; + }; + +nonterm priority_aug +{ + int priorityNum; +}; + +# Priority change specs. +priority_aug: + priority_aug_num final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + //cerr << "PRIOR AUG: " << $1->token.data << endl; + int aug = strtol( $1->token.data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + /* Priority number too large. Recover by setting the priority to 0. */ + error($1->token.loc) << "priority number " << $1->token.data << + " overflows" << endl; + $$->priorityNum = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + /* Priority number too large in the neg. Recover by using 0. */ + error($1->token.loc) << "priority number " << $1->token.data << + " underflows" << endl; + $$->priorityNum = 0; + } + else { + /* No overflow or underflow. */ + $$->priorityNum = aug; + } + }; + +nonterm priority_aug_num uses token_type; + +priority_aug_num: + TK_UInt final { + $$->token = *$1; + }; +priority_aug_num: + '+' TK_UInt final { + $$->token.set( "+", 1 ); + $$->token.loc = $1->loc; + $$->token.append( *$2 ); + }; +priority_aug_num: + '-' TK_UInt final { + $$->token.set( "-", 1 ); + $$->token.loc = $1->loc; + $$->token.append( *$2 ); + }; + +nonterm local_err_name +{ + int error_name; +}; + +local_err_name: + TK_Word final { + /* Lookup/create the priority key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + + /* Use the inserted/found priority key. */ + $$->error_name = localErrDictEl->value; + }; + + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm factor_with_rep +{ + FactorWithRep *factorWithRep; +}; + +factor_with_rep: + factor_with_rep '*' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarType ); + }; +factor_with_rep: + factor_with_rep TK_StarStar final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + }; +factor_with_rep: + factor_with_rep '?' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + }; +factor_with_rep: + factor_with_rep '+' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::ExactType ); + }; +factor_with_rep: + factor_with_rep '{' ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, $4->rep, FactorWithRep::MaxType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::MinType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, $5->rep, FactorWithRep::RangeType ); + }; +factor_with_rep: + factor_with_neg final { + $$->factorWithRep = new FactorWithRep( + $1->factorWithNeg->loc, $1->factorWithNeg ); + }; + +nonterm factor_rep_num +{ + int rep; +}; + +factor_rep_num: + TK_UInt final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int rep = strtol( $1->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error($1->loc) << "repetition number " << $1->data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm factor_with_neg +{ + FactorWithNeg *factorWithNeg; +}; + +factor_with_neg: + '!' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::NegateType ); + }; +factor_with_neg: + '^' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::CharNegateType ); + }; +factor_with_neg: + factor final { + $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor ); + }; + +nonterm factor +{ + Factor *factor; +}; + +factor: + TK_Literal final { + /* Create a new factor node going to a concat literal. */ + $$->factor = new Factor( new Literal( *$1, Literal::LitString ) ); + }; +factor: + alphabet_num final { + /* Create a new factor node going to a literal number. */ + $$->factor = new Factor( new Literal( $1->token, Literal::Number ) ); + }; +factor: + TK_Word final { + /* Find the named graph. */ + GraphDictEl *gdNode = pd->graphDict.find( $1->data ); + if ( gdNode == 0 ) { + /* Recover by returning null as the factor node. */ + error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; + $$->factor = 0; + } + else if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error($1->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = new Factor( $1->loc, gdNode->value ); + } + }; +factor: + RE_SqOpen regular_expr_or_data RE_SqClose final { + /* Create a new factor node going to an OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +factor: + RE_SqOpenNeg regular_expr_or_data RE_SqClose final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +factor: + RE_Slash regular_expr RE_Slash final { + if ( $3->length > 1 ) { + for ( char *p = $3->data; *p != 0; p++ ) { + if ( *p == 'i' ) + $2->regExpr->caseInsensitive = true; + } + } + + /* Create a new factor node going to a regular exp. */ + $$->factor = new Factor( $2->regExpr ); + }; +factor: + range_lit TK_DotDot range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = new Factor( new Range( $1->literal, $3->literal ) ); + }; +factor: + '(' join ')' final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = new Factor( $2->join ); + }; + +nonterm range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +range_lit: + TK_Literal final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = new Literal( *$1, Literal::LitString ); + }; +range_lit: + alphabet_num final { + /* Create a new literal number. */ + $$->literal = new Literal( $1->token, Literal::Number ); + }; + +nonterm alphabet_num uses token_type; + +# Any form of a number that can be used as a basic machine. */ +alphabet_num: + TK_UInt final { + $$->token = *$1; + }; +alphabet_num: + '-' TK_UInt final { + $$->token.set( "-", 1 ); + $$->token.loc = $1->loc; + $$->token.append( *$2 ); + }; +alphabet_num: + TK_Hex final { + $$->token = *$1; + }; +# +# Regular Expressions. +# + +nonterm regular_expr +{ + RegExpr *regExpr; +}; + +# Parser for regular expression fsms. Any number of expression items which +# generally gives a machine one character long or one character long stared. +regular_expr: + regular_expr regular_expr_item final { + /* An optimization to lessen the tree size. If a non-starred char is + * directly under the left side on the right and the right side is + * another non-starred char then paste them together and return the + * left side. Otherwise just put the two under a new reg exp node. */ + if ( $2->reItem->type == ReItem::Data && !$2->reItem->star && + $1->regExpr->type == RegExpr::RecurseItem && + $1->regExpr->item->type == ReItem::Data && !$1->regExpr->item->star ) + { + /* Append the right side to the right side of the left and toss the + * right side. */ + $1->regExpr->item->token.append( $2->reItem->token ); + delete $2->reItem; + $$->regExpr = $1->regExpr; + } + else { + $$->regExpr = new RegExpr( $1->regExpr, $2->reItem ); + } + }; +regular_expr: + final { + /* Can't optimize the tree. */ + $$->regExpr = new RegExpr(); + }; + +nonterm regular_expr_item +{ + ReItem *reItem; +}; + +# RegularExprItems can be a character spec with an optional staring of the char. +regular_expr_item: + regular_expr_char RE_Star final { + $1->reItem->star = true; + $$->reItem = $1->reItem; + }; +regular_expr_item: + regular_expr_char final { + $$->reItem = $1->reItem; + }; + +nonterm regular_expr_char +{ + ReItem *reItem; +}; + +# A character spec can be a set of characters inside of square parenthesis, a +# dot specifying any character or some explicitly stated character. +regular_expr_char: + RE_SqOpen regular_expr_or_data RE_SqClose final { + $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ); + }; +regular_expr_char: + RE_SqOpenNeg regular_expr_or_data RE_SqClose final { + $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ); + }; +regular_expr_char: + RE_Dot final { + $$->reItem = new ReItem( $1->loc, ReItem::Dot ); + }; +regular_expr_char: + RE_Char final { + $$->reItem = new ReItem( $1->loc, *$1 ); + }; + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +regular_expr_or_data: + regular_expr_or_data regular_expr_or_char final { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $2->reOrItem->type == ReOrItem::Data && + $1->reOrBlock->type == ReOrBlock::RecurseItem && + $1->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $1->reOrBlock->item->token.append( $2->reOrItem->token ); + delete $2->reOrItem; + $$->reOrBlock = $1->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); + } + }; +regular_expr_or_data: + final { + $$->reOrBlock = new ReOrBlock(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +regular_expr_or_char: + RE_Char final { + $$->reOrItem = new ReOrItem( $1->loc, *$1 ); + }; +regular_expr_or_char: + RE_Char RE_Dash RE_Char final { + $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); + }; + +# +# Inline Lists for inline host code. +# + +type inline_list +{ + InlineList *inlineList; +}; + +nonterm inline_block uses inline_list; + +inline_block: + inline_block inline_block_item + final { + /* Append the item to the list, return the list. */ + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; + +inline_block: + final { + /* Start with empty list. */ + $$->inlineList = new InlineList; + }; + +type inline_item +{ + InlineItem *inlineItem; +}; + +nonterm inline_block_item uses inline_item; +nonterm inline_block_interpret uses inline_item; + +inline_block_item: + inline_expr_any + final { + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; + +inline_block_item: + inline_block_symbol + final { + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; + +inline_block_item: + inline_block_interpret + final { + /* Pass the inline item up. */ + $$->inlineItem = $1->inlineItem; + }; + +nonterm inline_block_symbol uses token_type; + +inline_block_symbol: ',' final { $$->token = *$1; }; +inline_block_symbol: ';' final { $$->token = *$1; }; +inline_block_symbol: '(' final { $$->token = *$1; }; +inline_block_symbol: ')' final { $$->token = *$1; }; +inline_block_symbol: '*' final { $$->token = *$1; }; +inline_block_symbol: TK_NameSep final { $$->token = *$1; }; + +# Interpreted statements in a struct block. */ +inline_block_interpret: + inline_expr_interpret final { + /* Pass up interpreted items of inline expressions. */ + $$->inlineItem = $1->inlineItem; + }; +inline_block_interpret: + KW_Hold ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Hold ); + }; +inline_block_interpret: + KW_Exec inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Exec ); + $$->inlineItem->children = $2->inlineList; + }; +inline_block_interpret: + KW_Goto state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, + new NameRef(nameRef), InlineItem::Goto ); + }; +inline_block_interpret: + KW_Goto '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::GotoExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Next state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Next ); + }; +inline_block_interpret: + KW_Next '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::NextExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Call state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Call ); + }; +inline_block_interpret: + KW_Call '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::CallExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Ret ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Ret ); + }; +inline_block_interpret: + KW_Break ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Break ); + }; + +nonterm inline_expr uses inline_list; + +inline_expr: + inline_expr inline_expr_item + final { + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; +inline_expr: + final { + /* Init the list used for this expr. */ + $$->inlineList = new InlineList; + }; + +nonterm inline_expr_item uses inline_item; + +inline_expr_item: + inline_expr_any + final { + /* Return a text segment. */ + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; +inline_expr_item: + inline_expr_symbol + final { + /* Return a text segment, must heap alloc the text. */ + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; +inline_expr_item: + inline_expr_interpret + final{ + /* Pass the inline item up. */ + $$->inlineItem = $1->inlineItem; + }; + +nonterm inline_expr_any uses token_type; + +inline_expr_any: IL_WhiteSpace try { $$->token = *$1; }; +inline_expr_any: IL_Comment try { $$->token = *$1; }; +inline_expr_any: IL_Literal try { $$->token = *$1; }; +inline_expr_any: IL_Symbol try { $$->token = *$1; }; +inline_expr_any: TK_UInt try { $$->token = *$1; }; +inline_expr_any: TK_Hex try { $$->token = *$1; }; +inline_expr_any: TK_Word try { $$->token = *$1; }; + +# Anything in a ExecValExpr that is not dynamically allocated. This includes +# all special symbols caught in inline code except the semi. + +nonterm inline_expr_symbol uses token_type; + +inline_expr_symbol: ',' try { $$->token = *$1; }; +inline_expr_symbol: '(' try { $$->token = *$1; }; +inline_expr_symbol: ')' try { $$->token = *$1; }; +inline_expr_symbol: '*' try { $$->token = *$1; }; +inline_expr_symbol: TK_NameSep try { $$->token = *$1; }; + +nonterm inline_expr_interpret uses inline_item; + +inline_expr_interpret: + KW_PChar + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::PChar ); + }; +inline_expr_interpret: + KW_Char + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Char ); + }; +inline_expr_interpret: + KW_CurState + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Curs ); + }; +inline_expr_interpret: + KW_TargState + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Targs ); + }; +inline_expr_interpret: + KW_Entry '(' state_ref ')' + final { + $$->inlineItem = new InlineItem( $1->loc, + new NameRef(nameRef), InlineItem::Entry ); + }; + +# A local state reference. Cannot have :: prefix. +local_state_ref: + no_name_sep state_ref_names; + +# Clear the name ref structure. +no_name_sep: + final { + nameRef.empty(); + }; + +# A qualified state reference. +state_ref: opt_name_sep state_ref_names; + +# Optional leading name separator. +opt_name_sep: + TK_NameSep + final { + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + }; +opt_name_sep: + final { + nameRef.empty(); + }; + +# List of names separated by :: +state_ref_names: + state_ref_names TK_NameSep TK_Word + final { + nameRef.append( $3->data ); + }; +state_ref_names: + TK_Word + final { + nameRef.append( $1->data ); + }; + +}%% + +void Parser::tryMachineDef( InputLoc &loc, char *name, + JoinOrLm *joinOrLm, bool isInstance ) +{ + GraphDictEl *newEl = pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, joinOrLm ); + newEl->isInstance = isInstance; + newEl->loc = loc; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +ostream &error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( loc.fileName != 0 ); + cerr << loc.fileName << ":" << loc.line << ": "; + return cerr; +} + +ostream &Parser::parser_error( int tokId, Token &token ) +{ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << lelNames[tokId] << "\""; + else + cerr << lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + +int Parser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + token.data = tokstart; + token.length = toklen; + token.loc = loc; + int res = parseLangEl( tokId, token ); + if ( res < 0 ) { + parser_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} diff --git a/ragel/rlparse.y b/ragel/rlparse.y new file mode 100644 index 0000000..b0fc3df --- /dev/null +++ b/ragel/rlparse.y @@ -0,0 +1,1456 @@ +/* + * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +%{ + +#include <iostream> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include "ragel.h" +#include "parsetree.h" +#include "rlparse.h" + +using std::cerr; +using std::endl; + +InputData *id = 0; +int includeDepth = 0; + +extern bool inlineWhitespace; + +/* These come from the scanner and point back into the parser. We will borrow + * them for error reporting. */ +extern YYSTYPE *yylval; +extern YYLTYPE *yylloc; + +/* The include stack pointer from the scanner. Used to determine if we are + * currently processing an included file. */ +extern int inc_stack_ptr; + +/* Try to do a definition, common to assignment and instantiation. */ +void tryMachineDef( const YYLTYPE &loc, char *name, + JoinOrLm *joinOrLm, bool isInstance ); +void beginOutsideCode(); +void doInclude( const InputLoc &loc, char *sectionName, char *inputFile ); +int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ); + +bool sectionOpened; +void openSection(); + +#define WO_NOEND 0x01 + +%} + +%pure-parser + +%union { + /* General data types. */ + char c; + TokenData data; + int integer; + Literal *literal; + + /* Tree nodes. */ + Term *term; + FactorWithAug *factorWithAug; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + Factor *factor; + Expression *expression; + Join *join; + JoinOrLm *joinOrLm; + LmPartList *longestMatchList; + LongestMatchPart *longestMatchPart; + + /* Priorities and actions. */ + AugType augType; + StateAugType stateAugType; + Action *action; + PriorDesc *priorDesc; + + /* Regular expression items. */ + RegExpr *regExp; + ReItem *reItem; + ReOrBlock *reOrBlock; + ReOrItem *reOrItem; + + /* Inline parse tree items. */ + InlineItem *ilitem; + InlineList *illist; +} + +%token TK_Section +%token TK_SectionNL + +/* General tokens. */ +%token <data> TK_UInt +%token <data> TK_Hex +%token <data> TK_Word +%token <data> TK_Literal +%token <data> TK_CiLiteral +%token <data> TK_BaseClause +%token TK_DotDot +%token TK_ColonGt +%token TK_ColonGtGt +%token TK_LtColon +%token TK_Arrow +%token TK_DoubleArrow +%token TK_StarStar +%token TK_ColonEquals +%token TK_NameSep +%token TK_BarStar +%token TK_RepOpOpen +%token TK_DashDash + +%token TK_StartCond +%token TK_AllCond +%token TK_LeavingCond + +%token TK_Middle + +/* Global error actions. */ +%token TK_StartGblError +%token TK_AllGblError +%token TK_FinalGblError +%token TK_NotFinalGblError +%token TK_NotStartGblError +%token TK_MiddleGblError + +/* Local error actions. */ +%token TK_StartLocalError +%token TK_AllLocalError +%token TK_FinalLocalError +%token TK_NotFinalLocalError +%token TK_NotStartLocalError +%token TK_MiddleLocalError + +/* EOF Action embedding. */ +%token TK_StartEOF +%token TK_AllEOF +%token TK_FinalEOF +%token TK_NotFinalEOF +%token TK_NotStartEOF +%token TK_MiddleEOF + +/* To State Actions. */ +%token TK_StartToState +%token TK_AllToState +%token TK_FinalToState +%token TK_NotFinalToState +%token TK_NotStartToState +%token TK_MiddleToState + +/* In State Actions. */ +%token TK_StartFromState +%token TK_AllFromState +%token TK_FinalFromState +%token TK_NotFinalFromState +%token TK_NotStartFromState +%token TK_MiddleFromState + +/* Regular expression tokens. */ +%token <data> RE_Slash +%token RE_SqOpen +%token RE_SqOpenNeg +%token RE_SqClose +%token RE_Dot +%token RE_Star +%token RE_Dash +%token <data> RE_Char + +/* Tokens specific to inline code. */ +%token <data> IL_WhiteSpace +%token <data> IL_Comment +%token <data> IL_Literal +%token <data> IL_Symbol + +/* Keywords. */ +%token KW_Action +%token KW_AlphType +%token KW_Range +%token KW_GetKey +%token KW_Include +%token KW_Write +%token KW_Machine +%token KW_When +%token KW_Eof +%token KW_Err +%token KW_Lerr +%token KW_To +%token KW_From + +/* Specials in code blocks. */ +%token KW_Break +%token KW_Exec +%token KW_Hold +%token KW_PChar +%token KW_Char +%token KW_Goto +%token KW_Call +%token KW_Ret +%token KW_CurState +%token KW_TargState +%token KW_Entry +%token KW_Next +%token KW_Exec +%token<data> KW_Variable +%token KW_Access + +/* Special token for terminating semi-terminated code blocks. Needed because + * semi is sent as a token in the code block rather than as a generic symbol. */ +%token TK_Semi + +/* Symbols. In ragel lexical space, the scanner does not pass + * any data along with the symbols, in inline code lexical + * space it does. */ +%token '*' '?' '+' '!' '^' '(' ')' ';' ',' '=' +%token ':' '@' '%' '$' '-' '|' '&' '.' '>' + +/* Precedence information. Lower is a higher precedence. We need only two + * precedence groups. Shifting the minus sign in front of a literal number + * conflicts with the reduction of Expression and the subsequent shifting of a + * subtraction operator when a '-' is seen. Since we want subtraction to take + * precedence, we give EXPR_MINUS the higher priority. */ +%nonassoc '-' +%nonassoc EXPR_MINUS + +%type <augType> AugTypeBase +%type <augType> AugTypeGblError +%type <augType> AugTypeLocalError +%type <augType> AugTypeEOF +%type <augType> AugTypeToState +%type <augType> AugTypeFromState +%type <augType> AugTypeCond +%type <integer> PriorityAug +%type <data> PriorityAugNum +%type <action> ActionEmbed +%type <action> ActionEmbedWord +%type <action> ActionEmbedBlock +%type <action> OptLmPartAction +%type <longestMatchList> LmPartList +%type <longestMatchPart> LongestMatchPart +%type <join> Join +%type <joinOrLm> JoinOrLm +%type <expression> Expression +%type <term> Term +%type <factorWithAug> FactorWithLabel +%type <factorWithAug> FactorWithEp +%type <factorWithAug> FactorWithAug +%type <factorWithAug> FactorWithTransAction +%type <factorWithAug> FactorWithPriority +%type <factorWithAug> FactorWithCond +%type <factorWithAug> FactorWithToStateAction +%type <factorWithAug> FactorWithFromStateAction +%type <factorWithAug> FactorWithEOFAction +%type <factorWithAug> FactorWithGblErrorAction +%type <factorWithAug> FactorWithLocalErrorAction +%type <factorWithRep> FactorWithRep +%type <integer> FactorRepNum +%type <factorWithNeg> FactorWithNeg +%type <factor> Factor +%type <literal> RangeLit +%type <data> AlphabetNum +%type <data> MachineName +%type <integer> PriorityName +%type <integer> LocalErrName +%type <data> SectionName +%type <data> OptSection +%type <data> OptFileName +%type <integer> EndSection + +%type <illist> InlineBlock +%type <ilitem> InlineBlockItem +%type <ilitem> InlineBlockInterpret +%type <data> InlineBlockAny +%type <data> InlineBlockSymbol + +%type <illist> InlineExpr +%type <ilitem> InlineExprItem +%type <ilitem> InlineExprInterpret +%type <data> InlineExprSymbol +%type <data> InlineExprAny + +%type <regExp> RegularExpr +%type <reItem> RegularExprItem +%type <reItem> RegularExprChar +%type <reOrBlock> RegularExprOrData +%type <reOrItem> RegularExprOrChar + +%% + +/* Input is any number of input sections. An empty file is accepted. */ +input: FsmSpecList; +FsmSpecList: + FsmSpecList FsmSpec | + /* Nothing */; + +/* Fsm Specification. Fsms are begin with '%%' and may be a {} delimited + * list of Fsm statements or may be a single statement. If no name is + * given the last name given in a machine is used. */ +FsmSpec: + StartSection SectionName StatementList EndSection { + if ( includeDepth == 0 ) { + if ( sectionOpened ) + *outStream << "</ragel_def>\n"; + + if ( machineSpec == 0 && machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + *outStream << "<host line=\"" << $4 << "\">"; + } + } + }; + +StartSection: + TK_Section { + id->sectionLoc = InputLoc(@1); + + if ( includeDepth == 0 ) { + if ( machineSpec == 0 && machineName == 0 ) + *outStream << "</host>\n"; + sectionOpened = false; + } + }; + +SectionName: + KW_Machine TK_Word ';' { + /* By default active until found not active. */ + id->active = true; + id->sectionName = $2.data; + + if ( id->includeSpec != 0 ) { + if ( strcmp( id->sectionName, id->includeSpec ) == 0 ) + id->sectionName = id->includeTo; + else + id->active = false; + } + + /* Lookup the parse data, if it is not there then create it. */ + SectionMapEl *sectionMapEl = sectionMap.find( id->sectionName ); + if ( sectionMapEl == 0 ) { + ParseData *newPd = new ParseData( id->fileName, id->sectionName, + id->sectionLoc ); + sectionMapEl = sectionMap.insert( id->sectionName, newPd ); + } + id->pd = sectionMapEl->value; + } | + /* Empty */ { + /* No machine name. Just use the previous section setup. Report an + * error if there is no previous section */ + if ( id->pd == 0 ) { + error(id->sectionLoc) << "the first ragel section does not have a name" << endl; + id->pd = new ParseData( id->fileName, "<DUMMY>", id->sectionLoc ); + } + }; + +EndSection: + TK_Section { $$ = @1.last_line; } | + TK_SectionNL { $$ = @1.last_line + 1; }; + +/* A NonEmpty list of statements in a fsm. */ +StatementList: + StatementList Statement | + /* Nothing */; + +/* The differnt types of statements in a fsm spec. */ +Statement: + Assignment | + Instantiation | + ActionSpec | + AlphSpec | + GetKeySpec | + RangeSpec | + Include | + Write | + Access | + Variable; + +/* Garble up to the next ; */ +Statement: error ';' { yyerrok; }; + +/* Allow the user to create a named fsm action that can be referenced when + * building a machine. */ +ActionSpec: + KW_Action TK_Word '{' InlineBlock '}' { + if ( id->active ) { + if ( id->pd->actionDict.find( $2.data ) ) { + /* Recover by just ignoring the duplicate. */ + error(@2) << "action \"" << $2.data << "\" already defined" << endl; + } + else { + /* Add the action to the list of actions. */ + Action *newAction = new Action( InputLoc(@3), $2.data, $4, id->nameRefList ); + + /* Insert to list and dict. */ + id->pd->actionList.append( newAction ); + id->pd->actionDict.insert( newAction ); + } + } + }; + +/* Specifies the data type of the input alphabet. One or two words + * followed by a semi-colon. */ +AlphSpec: + KW_AlphType TK_Word TK_Word TK_Semi { + if ( id->active ) { + if ( ! id->pd->setAlphType( $2.data, $3.data ) ) { + // Recover by ignoring the alphtype statement. + error(@2) << "\"" << $2.data << + " " << $3.data << "\" is not a valid alphabet type" << endl; + } + } + } | + KW_AlphType TK_Word TK_Semi { + if ( id->active ) { + if ( ! id->pd->setAlphType( $2.data ) ) { + // Recover by ignoring the alphtype statement. + error(@2) << "\"" << $2.data << "\" is not a valid alphabet type" << endl; + } + } + }; + +GetKeySpec: + KW_GetKey InlineBlock TK_Semi { + if ( id->active ) + id->pd->getKeyExpr = $2; + }; + +/* Specifies a range to assume that the input characters will fall into. */ +RangeSpec: + KW_Range AlphabetNum AlphabetNum ';' { + if ( id->active ) { + // Save the upper and lower ends of the range and emit the line number. + id->pd->lowerNum = $2.data; + id->pd->upperNum = $3.data; + id->pd->rangeLowLoc = InputLoc(@2); + id->pd->rangeHighLoc = InputLoc(@3); + } + }; + + +Write: + WriteOpen WriteOptions ';' { + if ( id->active ) + *outStream << "</write>\n"; + }; + +WriteOpen: + KW_Write TK_Word { + if ( id->active ) { + openSection(); + if ( strcmp( $2.data, "data" ) != 0 && + strcmp( $2.data, "init" ) != 0 && + strcmp( $2.data, "exec" ) != 0 && + strcmp( $2.data, "eof" ) != 0 ) + { + error( @2 ) << "unknown write command" << endl; + } + *outStream << " <write what=\"" << $2.data << "\">"; + } + }; + +WriteOptions: + WriteOptions TK_Word { + if ( id->active ) + *outStream << "<option>" << $2.data << "</option>"; + } | + /* Nothing */; + +Access: + KW_Access InlineBlock TK_Semi { + if ( id->active ) + id->pd->accessExpr = $2; + }; + +Variable: + KW_Variable InlineBlock TK_Semi { + if ( id->active ) { + if ( strcmp( $1.data, "curstate" ) == 0 ) { + id->pd->curStateExpr = $2; + } + } + }; + +/* Include statements are processed by both the scanner and the parser. */ +Include: + IncludeKeyword OptSection OptFileName ';' { + if ( id->active ) + doInclude( @1, $2.data, $3.data ); + }; + +IncludeKeyword: + KW_Include { + /* Do this immediately so that the scanner has a correct sense of the + * value in id->active when it reaches the end of the statement before + * the above action executes. */ + //getParseData( @1 ); + }; + +OptSection: TK_Word { $$ = $1; } | { $$.data = 0; $$.length = 0; }; +OptFileName: TK_Literal { $$ = $1; } | { $$.data = 0; $$.length = 0; }; + +/* An assignement statement. Assigns the definition of a machine to a variable name. */ +Assignment: + MachineName '=' Join ';' { + if ( id->active ) { + /* Main machine must be an instance. */ + bool isInstance = false; + if ( strcmp($1.data, machineMain) == 0 ) { + warning(@1) << "main machine will be implicitly instantiated" << endl; + isInstance = true; + } + + /* Generic creation of machine for instantiation and assignment. */ + JoinOrLm *joinOrLm = new JoinOrLm( $3 ); + tryMachineDef( @1, $1.data, joinOrLm, isInstance ); + } + }; + +/* An instantiation statement. Instantiates a machine and assigns it to a + * variable name. */ +Instantiation: + MachineName TK_ColonEquals JoinOrLm ';' { + /* Generic creation of machine for instantiation and assignment. */ + if ( id->active ) + tryMachineDef( @1, $1.data, $3, true ); + }; + +/* Capture the machine name for making the machine's priority name. */ +MachineName: + TK_Word { + if ( id->active ) { + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) ) + id->pd->nextPriorKey += 1; + id->pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) ) + id->pd->nextLocalErrKey += 1; + id->pd->curDefLocalErrKey = localErrDictEl->value; + } + }; + +JoinOrLm: + Join { + $$ = new JoinOrLm( $1 ); + } | + TK_BarStar LmPartList '*' '|' { + /* Create a new factor going to a longest match structure. Record + * in the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( @1, $2 ); + if ( id->active ) + id->pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *($2); lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + $$ = new JoinOrLm( lm ); + }; + +Join: + Join ',' Expression { + /* Append the expression to the list and return it. */ + $1->exprList.append( $3 ); + $$ = $1; + } | + Expression { + /* Create the expression list with the intial expression. */ + $$ = new Join( InputLoc(@1), $1 ); + }; + +/* Top level production in the parse of a fsm. The lowest precedence + * is the '|' (or), '&' (intersection), and '-' (subtraction) operators. */ +Expression: + Expression '|' Term { + $$ = new Expression( $1, $3, Expression::OrType ); + } %prec EXPR_MINUS | + Expression '&' Term { + $$ = new Expression( $1, $3, Expression::IntersectType ); + } %prec EXPR_MINUS | + Expression '-' Term { + $$ = new Expression( $1, $3, Expression::SubtractType ); + } %prec EXPR_MINUS | + Expression TK_DashDash Term { + $$ = new Expression( $1, $3, Expression::StrongSubtractType ); + } %prec EXPR_MINUS | + Term { + $$ = new Expression( $1 ); + } %prec EXPR_MINUS; + +Term: + Term FactorWithLabel { + $$ = new Term( $1, $2 ); + } | + Term '.' FactorWithLabel { + $$ = new Term( $1, $3 ); + } | + Term TK_ColonGt FactorWithLabel { + $$ = new Term( $1, $3, Term::RightStartType ); + } | + Term TK_ColonGtGt FactorWithLabel { + $$ = new Term( $1, $3, Term::RightFinishType ); + } | + Term TK_LtColon FactorWithLabel { + $$ = new Term( $1, $3, Term::LeftType ); + } | + FactorWithLabel { + $$ = new Term( $1 ); + }; + +FactorWithLabel: + TK_Word ':' FactorWithLabel { + /* Add the label to the list and pass the factor up. */ + $3->labels.prepend( Label(@1, $1.data) ); + $$ = $3; + } | + FactorWithEp; + +FactorWithEp: + FactorWithEp TK_Arrow LocalStateRef { + /* Add the target to the list and return the factor object. */ + $1->epsilonLinks.append( EpsilonLink( InputLoc(@2), id->nameRef ) ); + $$ = $1; + } | + FactorWithAug; + +/* A local state reference. Qualified name witout :: prefix. */ +LocalStateRef: + NoNameSep StateRefNames; + +/* Clear the name ref structure. */ +NoNameSep: + /* Nothing */ { + id->nameRef.empty(); + }; + +/* A qualified state reference. */ +StateRef: + OptNameSep StateRefNames; + +/* Optional leading name separator. */ +OptNameSep: + TK_NameSep { + /* Insert an inition null pointer val to indicate the existence of the + * initial name seperator. */ + id->nameRef.setAs( 0 ); + } | + /* Nothing. */ { + id->nameRef.empty(); + }; + +/* List of names separated by :: */ +StateRefNames: + StateRefNames TK_NameSep TK_Word { + id->nameRef.append( $3.data ); + } | + TK_Word { + id->nameRef.append( $1.data ); + }; + +/* Third group up in precedence. Allow users to embed actions and priorities */ +FactorWithAug: + FactorWithTransAction | + FactorWithPriority | + FactorWithCond | + FactorWithToStateAction | + FactorWithFromStateAction | + FactorWithEOFAction | + FactorWithGblErrorAction | + FactorWithLocalErrorAction | + FactorWithRep { + $$ = new FactorWithAug( $1 ); + }; + +FactorWithTransAction: + FactorWithAug AugTypeBase ActionEmbed { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); + $$ = $1; + }; + +FactorWithPriority: + FactorWithAug AugTypeBase PriorityAug { + if ( id->active ) { + /* Append the named priority to the factorWithAug and pass it up. */ + $1->priorityAugs.append( PriorityAug( $2, id->pd->curDefPriorKey, $3 ) ); + } + $$ = $1; + } | + FactorWithAug AugTypeBase '(' PriorityName ',' PriorityAug ')' { + /* Append the priority using a default name. */ + $1->priorityAugs.append( PriorityAug( $2, $4, $6 ) ); + $$ = $1; + }; + +FactorWithCond: + FactorWithAug AugTypeCond ActionEmbed { + $$->conditions.append( ParserAction( @2, $2, 0, $3 ) ); + $$ = $1; + }; + +AugTypeCond: + TK_StartCond { $$ = at_start; } | + '>' KW_When { $$ = at_start; } | + TK_AllCond { $$ = at_all; } | + '$' KW_When { $$ = at_all; } | + TK_LeavingCond { $$ = at_leave; } | + '%' KW_When { $$ = at_all; } | + KW_When { $$ = at_all; }; + +FactorWithToStateAction: + FactorWithAug AugTypeToState ActionEmbed { + /* Append the action, pass it up. */ + $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); + $$ = $1; + }; + +FactorWithFromStateAction: + FactorWithAug AugTypeFromState ActionEmbed { + /* Append the action, pass it up. */ + $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); + $$ = $1; + }; + +FactorWithEOFAction: + FactorWithAug AugTypeEOF ActionEmbed { + /* Append the action, pass it up. */ + $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); + $$ = $1; + }; + +FactorWithGblErrorAction: + FactorWithAug AugTypeGblError ActionEmbed { + if ( id->active ) { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) ); + } + $$ = $1; + }; + +FactorWithLocalErrorAction: + FactorWithAug AugTypeLocalError ActionEmbed { + if ( id->active ) { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) ); + } + $$ = $1; + } | + FactorWithAug AugTypeLocalError '(' LocalErrName ',' ActionEmbed ')' { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->actions.append( ParserAction( @2, $2, $4, $6 ) ); + $$ = $1; + }; + +/* A specified priority name. Looks up the name in the current priority + * dictionary. */ +PriorityName: + TK_Word { + if ( id->active ) { + // Lookup/create the priority key. + PriorDictEl *priorDictEl; + if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) ) + id->pd->nextPriorKey += 1; + + // Use the inserted/found priority key. + $$ = priorDictEl->value; + } + }; + +LocalErrName: + TK_Word { + if ( id->active ) { + /* Lookup/create the priority key. */ + LocalErrDictEl *localErrDictEl; + if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) ) + id->pd->nextLocalErrKey += 1; + + /* Use the inserted/found priority key. */ + $$ = localErrDictEl->value; + } + }; + +/* Priority change specs. */ +PriorityAug: + PriorityAugNum { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int aug = strtol( $1.data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + // Priority number too large. Recover by setting the priority to 0. + error(@1) << "priority number " << $1.data << " overflows" << endl; + $$ = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + // Priority number too large in the neg. Recover by using 0. + error(@1) << "priority number " << $1.data << " underflows" << endl; + $$ = 0; + } + else { + // No overflow or underflow. + $$ = aug; + } + }; + +PriorityAugNum: + TK_UInt | + '+' TK_UInt { + $$ = $2; + } | + '-' TK_UInt { + $$.data = "-"; + $$.length = 1; + $$.append( $2 ); + }; + +/* Classes of transtions on which to embed actions or change priorities. */ +AugTypeBase: + '@' { $$ = at_finish; } | + '%' { $$ = at_leave; } | + '$' { $$ = at_all; } | + '>' { $$ = at_start; }; + +/* Global error actions. */ +AugTypeGblError: + TK_StartGblError { $$ = at_start_gbl_error; } | + '>' KW_Err { $$ = at_start_gbl_error; } | + + TK_NotStartGblError { $$ = at_not_start_gbl_error; } | + '<' KW_Err { $$ = at_not_start_gbl_error; } | + + TK_AllGblError { $$ = at_all_gbl_error; } | + '$' KW_Err { $$ = at_all_gbl_error; } | + + TK_FinalGblError { $$ = at_final_gbl_error; } | + '%' KW_Err { $$ = at_final_gbl_error; } | + + TK_NotFinalGblError { $$ = at_not_final_gbl_error; } | + '@' KW_Err { $$ = at_not_final_gbl_error; } | + + TK_MiddleGblError { $$ = at_middle_gbl_error; } | + TK_Middle KW_Err { $$ = at_middle_gbl_error; }; + +/* Local error actions. */ +AugTypeLocalError: + TK_StartLocalError { $$ = at_start_local_error; } | + '>' KW_Lerr { $$ = at_start_local_error; } | + + TK_NotStartLocalError { $$ = at_not_start_local_error; } | + '<' KW_Lerr { $$ = at_not_start_local_error; } | + + TK_AllLocalError { $$ = at_all_local_error; } | + '$' KW_Lerr { $$ = at_all_local_error; } | + + TK_FinalLocalError { $$ = at_final_local_error; } | + '%' KW_Lerr { $$ = at_final_local_error; } | + + TK_NotFinalLocalError { $$ = at_not_final_local_error; } | + '@' KW_Lerr { $$ = at_not_final_local_error; } | + + TK_MiddleLocalError { $$ = at_middle_local_error; } | + TK_Middle KW_Lerr { $$ = at_middle_local_error; }; + +/* Eof state actions. */ +AugTypeEOF: + TK_StartEOF { $$ = at_start_eof; } | + '>' KW_Eof { $$ = at_start_eof; } | + + TK_NotStartEOF { $$ = at_not_start_eof; } | + '<' KW_Eof { $$ = at_not_start_eof; } | + + TK_AllEOF { $$ = at_all_eof; } | + '$' KW_Eof { $$ = at_all_eof; } | + + TK_FinalEOF { $$ = at_final_eof; } | + '%' KW_Eof { $$ = at_final_eof; } | + + TK_NotFinalEOF { $$ = at_not_final_eof; } | + '@' KW_Eof { $$ = at_not_final_eof; } | + + TK_MiddleEOF { $$ = at_middle_eof; } | + TK_Middle KW_Eof { $$ = at_middle_eof; }; + +/* To state actions. */ +AugTypeToState: + TK_StartToState { $$ = at_start_to_state; } | + '>' KW_To { $$ = at_start_to_state; } | + + TK_NotStartToState { $$ = at_not_start_to_state; } | + '<' KW_To { $$ = at_not_start_to_state; } | + + TK_AllToState { $$ = at_all_to_state; } | + '$' KW_To { $$ = at_all_to_state; } | + + TK_FinalToState { $$ = at_final_to_state; } | + '%' KW_To { $$ = at_final_to_state; } | + + TK_NotFinalToState { $$ = at_not_final_to_state; } | + '@' KW_To { $$ = at_not_final_to_state; } | + + TK_MiddleToState { $$ = at_middle_to_state; } | + TK_Middle KW_To { $$ = at_middle_to_state; }; + +/* From state actions. */ +AugTypeFromState: + TK_StartFromState { $$ = at_start_from_state; } | + '>' KW_From { $$ = at_start_from_state; } | + + TK_NotStartFromState { $$ = at_not_start_from_state; } | + '<' KW_From { $$ = at_not_start_from_state; } | + + TK_AllFromState { $$ = at_all_from_state; } | + '$' KW_From { $$ = at_all_from_state; } | + + TK_FinalFromState { $$ = at_final_from_state; } | + '%' KW_From { $$ = at_final_from_state; } | + + TK_NotFinalFromState { $$ = at_not_final_from_state; } | + '@' KW_From { $$ = at_not_final_from_state; } | + + TK_MiddleFromState { $$ = at_middle_from_state; } | + TK_Middle KW_From { $$ = at_middle_from_state; }; + + +/* Different ways to embed actions. A TK_Word is reference to an action given by + * the user as a statement in the fsm specification. An action can also be + * specified immediately. */ +ActionEmbed: + ActionEmbedWord | ActionEmbedBlock; + +ActionEmbedWord: + TK_Word { + if ( id->active ) { + /* Set the name in the actionDict. */ + Action *action = id->pd->actionDict.find( $1.data ); + if ( action != 0 ) { + /* Pass up the action element */ + $$ = action; + } + else { + /* Will recover by returning null as the action. */ + error(@1) << "action lookup of \"" << $1.data << "\" failed" << endl; + $$ = 0; + } + } + }; + +ActionEmbedBlock: + '{' InlineBlock '}' { + if ( id->active ) { + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( InputLoc(@1), 0, $2, id->nameRefList ); + id->pd->actionList.append( newAction ); + $$ = newAction; + } + }; + +/* The fourth level of precedence. These are the trailing unary operators that + * allow for repetition. */ +FactorWithRep: + FactorWithRep '*' { + $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, + FactorWithRep::StarType ); + } | + FactorWithRep TK_StarStar { + $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, + FactorWithRep::StarStarType ); + } | + FactorWithRep '?' { + $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, + FactorWithRep::OptionalType ); + } | + FactorWithRep '+' { + $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, + FactorWithRep::PlusType ); + } | + FactorWithRep TK_RepOpOpen FactorRepNum '}' { + $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0, + FactorWithRep::ExactType ); + } | + FactorWithRep TK_RepOpOpen ',' FactorRepNum '}' { + $$ = new FactorWithRep( InputLoc(@2), $1, 0, $4, + FactorWithRep::MaxType ); + } | + FactorWithRep TK_RepOpOpen FactorRepNum ',' '}' { + $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0, + FactorWithRep::MinType ); + } | + FactorWithRep TK_RepOpOpen FactorRepNum ',' FactorRepNum '}' { + $$ = new FactorWithRep( InputLoc(@2), $1, $3, $5, + FactorWithRep::RangeType ); + } | + FactorWithNeg { + $$ = new FactorWithRep( InputLoc(@1), $1 ); + }; + +FactorRepNum: + TK_UInt { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int rep = strtol( $1.data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error(@1) << "repetition number " << $1.data << " overflows" << endl; + $$ = 1; + } + else { + // Cannot be negative, so no overflow. + $$ = rep; + } + }; + +/* The fifth level up in precedence. Negation. */ +FactorWithNeg: + '!' FactorWithNeg { + $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::NegateType ); + } | + '^' FactorWithNeg { + $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::CharNegateType ); + } | + Factor { + $$ = new FactorWithNeg( InputLoc(@1), $1 ); + }; + +/* The highest level in precedence. Atomic machines such as references to other + * machines, literal machines, regular expressions or Expressions in side of + * parenthesis. */ +Factor: + TK_Literal { + // Create a new factor node going to a concat literal. */ + $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) ); + } | + TK_CiLiteral { + // Create a new factor node going to a concat literal. */ + $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) ); + $$->literal->caseInsensitive = true; + } | + AlphabetNum { + // Create a new factor node going to a literal number. */ + $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::Number ) ); + } | + TK_Word { + if ( id->active ) { + // Find the named graph. + GraphDictEl *gdNode = id->pd->graphDict.find( $1.data ); + if ( gdNode == 0 ) { + // Recover by returning null as the factor node. + error(@1) << "graph lookup of \"" << $1.data << "\" failed" << endl; + $$ = 0; + } + else if ( gdNode->isInstance ) { + // Recover by retuning null as the factor node. + error(@1) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$ = 0; + } + else { + // Create a factor node that is a lookup of an expression. + $$ = new Factor( InputLoc(@1), gdNode->value ); + } + } + } | + RE_SqOpen RegularExprOrData RE_SqClose { + // Create a new factor node going to an OR expression. */ + $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::OrBlock ) ); + } | + RE_SqOpenNeg RegularExprOrData RE_SqClose { + // Create a new factor node going to a negated OR expression. */ + $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ) ); + } | + RE_Slash RegularExpr RE_Slash { + if ( $3.length > 1 ) { + for ( char *p = $3.data; *p != 0; p++ ) { + if ( *p == 'i' ) + $2->caseInsensitive = true; + } + } + + // Create a new factor node going to a regular exp. + $$ = new Factor( $2 ); + } | + RangeLit TK_DotDot RangeLit { + // Create a new factor node going to a range. */ + $$ = new Factor( new Range( $1, $3 ) ); + } | + '(' Join ')' { + /* Create a new factor going to a parenthesized join. */ + $$ = new Factor( $2 ); + }; + +/* Garble up to the closing brace of a parenthesized expression. */ +Factor: '(' error ')' { $$ = 0; yyerrok; }; + +LmPartList: + LmPartList LongestMatchPart { + if ( $2 != 0 ) + $1->append( $2 ); + $$ = $1; + } | + LongestMatchPart { + /* Create a new list with the part. */ + $$ = new LmPartList; + if ( $1 != 0 ) + $$->append( $1 ); + }; + +LongestMatchPart: + ActionSpec { $$ = 0; } | + Assignment { $$ = 0; } | + Join OptLmPartAction ';' { + $$ = 0; + if ( id->active ) { + Action *action = $2; + if ( action != 0 ) + action->isLmAction = true; + $$ = new LongestMatchPart( $1, action, id->pd->nextLongestMatchId++ ); + } + }; + +OptLmPartAction: + TK_DoubleArrow ActionEmbed { $$ = $2; } | + ActionEmbedBlock { $$ = $1; } | + /* Nothing */ { $$ = 0; }; + + +/* Any form of a number that can be used as a basic machine. */ +AlphabetNum: + TK_UInt | + '-' TK_UInt { + $$.data = "-"; + $$.length = 1; + $$.append( $2 ); + } | + TK_Hex; + +InlineBlock: + InlineBlock InlineBlockItem { + /* Append the item to the list, return the list. */ + $1->append( $2 ); + $$ = $1; + } | + /* Empty */ { + /* Start with empty list. */ + $$ = new InlineList; + }; + +/* Items in a struct block. */ +InlineBlockItem: + InlineBlockAny { + /* Add a text segment. */ + $$ = new InlineItem( @1, $1.data, InlineItem::Text ); + } | + InlineBlockSymbol { + /* Add a text segment, need string on heap. */ + $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text ); + } | + InlineBlockInterpret { + /* Pass the inline item up. */ + $$ = $1; + }; + +/* Uninteresting tokens in a struct block. Data allocated by scanner. */ +InlineBlockAny: + IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol | + TK_UInt | TK_Hex | TK_Word; + +/* Symbols in a struct block, no data allocated. */ +InlineBlockSymbol: + ',' { $$.data = ","; $$.length = 1; } | + ';' { $$.data = ";"; $$.length = 1; } | + '(' { $$.data = "("; $$.length = 1; } | + ')' { $$.data = ")"; $$.length = 1; } | + '*' { $$.data = "*"; $$.length = 1; } | + TK_NameSep { $$.data = "::"; $$.length = 2; }; + +/* Interpreted statements in a struct block. */ +InlineBlockInterpret: + InlineExprInterpret { + /* Pass up interpreted items of inline expressions. */ + $$ = $1; + } | + KW_Hold SetNoWs ';' SetWs { + $$ = new InlineItem( @1, InlineItem::Hold ); + } | + KW_Exec SetNoWs InlineExpr ';' SetWs { + $$ = new InlineItem( @1, InlineItem::Exec ); + $$->children = $3; + } | + KW_Goto SetNoWs StateRef ';' SetWs { + $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Goto ); + } | + KW_Goto SetNoWs '*' SetWs InlineExpr ';' { + $$ = new InlineItem( @1, InlineItem::GotoExpr ); + $$->children = $5; + } | + KW_Next SetNoWs StateRef ';' SetWs { + $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Next ); + } | + KW_Next SetNoWs '*' SetWs InlineExpr ';' { + $$ = new InlineItem( @1, InlineItem::NextExpr ); + $$->children = $5; + } | + KW_Call SetNoWs StateRef ';' SetWs { + $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Call ); + } | + KW_Call SetNoWs '*' SetWs InlineExpr ';' { + $$ = new InlineItem( @1, InlineItem::CallExpr ); + $$->children = $5; + } | + KW_Ret SetNoWs ';' SetWs { + $$ = new InlineItem( @1, InlineItem::Ret ); + } | + KW_Break SetNoWs ';' SetWs { + $$ = new InlineItem( @1, InlineItem::Break ); + }; + +/* Turn off whitspace collecting when scanning inline blocks. */ +SetNoWs: { inlineWhitespace = false; }; + +/* Turn on whitespace collecting when scanning inline blocks. */ +SetWs: { inlineWhitespace = true; }; + +InlineExpr: + InlineExpr InlineExprItem { + $1->append( $2 ); + $$ = $1; + } | + /* Empty */ { + /* Init the list used for this expr. */ + $$ = new InlineList; + }; + +InlineExprItem: + InlineExprAny { + /* Return a text segment. */ + $$ = new InlineItem( @1, $1.data, InlineItem::Text ); + } | + InlineExprSymbol { + /* Return a text segment, must heap alloc the text. */ + $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text ); + } | + InlineExprInterpret { + /* Pass the inline item up. */ + $$ = $1; + }; + +InlineExprInterpret: + KW_PChar { + $$ = new InlineItem( @1, InlineItem::PChar ); + } | + KW_Char { + $$ = new InlineItem( @1, InlineItem::Char ); + } | + KW_CurState { + $$ = new InlineItem( @1, InlineItem::Curs ); + } | + KW_TargState { + $$ = new InlineItem( @1, InlineItem::Targs ); + } | + KW_Entry SetNoWs '(' StateRef ')' SetWs { + $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Entry ); + }; + +InlineExprAny: + IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol | + TK_UInt | TK_Hex | TK_Word; + +/* Anything in a ExecValExpr that is not dynamically allocated. This includes + * all special symbols caught in inline code except the semi. */ +InlineExprSymbol: + '(' { $$.data = "("; $$.length = 1; } | + ')' { $$.data = ")"; $$.length = 1; } | + '*' { $$.data = "*"; $$.length = 1; } | + TK_NameSep { $$.data = "::"; $$.length = 1; }; + +/* Parser for regular expression fsms. Any number of expression items which + * generally gives a machine one character long or one character long stared. */ +RegularExpr: + RegularExpr RegularExprItem { + // An optimization to lessen the tree size. If a non-starred char is directly + // under the left side on the right and the right side is another non-starred + // char then paste them together and return the left side. Otherwise + // just put the two under a new reg exp node. + if ( $2->type == ReItem::Data && !$2->star && + $1->type == RegExpr::RecurseItem && + $1->item->type == ReItem::Data && !$1->item->star ) + { + // Append the right side to the right side of the left and toss + // the right side. + $1->item->data.append( $2->data ); + delete $2; + $$ = $1; + } + else { + $$ = new RegExpr( $1, $2 ); + } + } | + /* Nothing */ { + // Can't optimize the tree. + $$ = new RegExpr(); + }; + +/* RegularExprItems can be a character spec with an optional staring of the char. */ +RegularExprItem: + RegularExprChar RE_Star { + $1->star = true; + $$ = $1; + } | + RegularExprChar { + $$ = $1; + }; + +/* A character spec can be a set of characters inside of square parenthesis, + * a dot specifying any character or some explicitly stated character. */ +RegularExprChar: + RE_SqOpen RegularExprOrData RE_SqClose { + $$ = new ReItem( InputLoc(@1), $2, ReItem::OrBlock ); + } | + RE_SqOpenNeg RegularExprOrData RE_SqClose { + $$ = new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ); + } | + RE_Dot { + $$ = new ReItem( InputLoc(@1), ReItem::Dot ); + } | + RE_Char { + $$ = new ReItem( InputLoc(@1), $1.data[0] ); + }; + +/* The data inside of a [] expression in a regular expression. Accepts any + * number of characters or ranges. */ +RegularExprOrData: + RegularExprOrData RegularExprOrChar { + // An optimization to lessen the tree size. If an or char is directly + // under the left side on the right and the right side is another or + // char then paste them together and return the left side. Otherwise + // just put the two under a new or data node. + if ( $2->type == ReOrItem::Data && + $1->type == ReOrBlock::RecurseItem && + $1->item->type == ReOrItem::Data ) + { + // Append the right side to right side of the left and toss + // the right side. + $1->item->data.append( $2->data ); + delete $2; + $$ = $1; + } + else { + // Can't optimize, put the left and right under a new node. + $$ = new ReOrBlock( $1, $2 ); + } + } | + /* Nothing */ { + $$ = new ReOrBlock(); + }; + + +/* A single character inside of an or expression. Can either be a character + * or a set of characters. */ +RegularExprOrChar: + RE_Char { + $$ = new ReOrItem( InputLoc(@1), $1.data[0] ); + } | + RE_Char RE_Dash RE_Char { + $$ = new ReOrItem( InputLoc(@2), $1.data[0], $3.data[0] ); + }; + +RangeLit: + TK_Literal { + // Range literas must have only one char. + if ( strlen($1.data) != 1 ) { + // Recover by using the literal anyways. + error(@1) << "literal used in range must be of length 1" << endl; + } + $$ = new Literal( InputLoc(@1), $1, Literal::LitString ); + } | + AlphabetNum { + // Create a new literal number. + $$ = new Literal( InputLoc(@1), $1, Literal::Number ); + }; + +%% + +/* Try to do a definition, common to assignment and instantiation. Warns about + * instances other than main not being implemented yet. */ +void tryMachineDef( const YYLTYPE &loc, char *name, JoinOrLm *joinOrLm, bool isInstance ) +{ + GraphDictEl *newEl = id->pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, joinOrLm ); + newEl->isInstance = isInstance; + newEl->loc = loc; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + id->pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +void doInclude( const InputLoc &loc, char *sectionName, char *inputFile ) +{ + /* Bail if we hit the max include depth. */ + if ( includeDepth == INCLUDE_STACK_SIZE ) { + error(loc) << "hit maximum include depth of " << INCLUDE_STACK_SIZE << endl; + } + else { + char *includeTo = id->pd->fsmName; + + /* Implement defaults for the input file and section name. */ + if ( inputFile == 0 ) + inputFile = id->fileName; + if ( sectionName == 0 ) + sectionName = id->pd->fsmName; + + /* Parse the included file. */ + InputData *oldId = id; + id = new InputData( inputFile, sectionName, includeTo ); + includeDepth += 1; + yyparse(); + includeDepth -= 1; + delete id; + id = oldId; + } +} + +void openSection() +{ + if ( ! sectionOpened ) { + sectionOpened = true; + *outStream << "<ragel_def name=\"" << id->pd->fsmName << "\">\n"; + } +} + +void yyerror( char *err ) +{ + /* Bison won't give us the location, but in the last call to the scanner we + * saved a pointer to the location variable. Use that. instead. */ + error(::yylloc->first_line, ::yylloc->first_column) << err << endl; +} diff --git a/ragel/rlscan.lex b/ragel/rlscan.lex new file mode 100644 index 0000000..8116c92 --- /dev/null +++ b/ragel/rlscan.lex @@ -0,0 +1,1212 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +%{ + +#define YY_NEVER_INTERACTIVE 1 +//#define WANT_TOKEN_WRITE + +#include <iostream> +#include "ragel.h" +#include "rlparse.h" +#include "parsedata.h" +#include "buffer.h" + +using std::cout; +using std::cerr; +using std::endl; + +Buffer tokbuf; +int builtinBrace = 0; +bool inlineWhitespace = true; +bool handlingInclude = false; +bool multiline = false; + +/* Used for recognising host language code blocks, init with anything not + * involved in the host lang test. */ +int previous_tokens[2] = { TK_Section, TK_Section }; + +/* These keep track of the start of an inline comment or literal string for + * reporting unterminated comments or strings. */ +int il_comm_lit_first_line; +int il_comm_lit_first_column; + +/* These keep track of the start of a code block for reporting unterminated + * code blocks. */ +int il_code_first_line; +int il_code_first_column; + +/* Include Stack data. */ +YY_BUFFER_STATE buff_stack[INCLUDE_STACK_SIZE]; +bool multiline_stack[INCLUDE_STACK_SIZE]; +int inc_stack_ptr = 0; + +YYSTYPE *yylval; +YYLTYPE *yylloc; + +extern InputData *id; +extern int includeDepth; + +void garble(); + +void extendToken( char *data, int len ); +void extendToken(); + +int emitToken( int token, char *data, int len ); +int emitNoData( int token ); +void passThrough( char *data ); +bool openMachineSpecBlock(); +void popInclude(); + +enum InlineBlockType { + CurlyDelimited, + SemiTerminated +} inlineBlockType; + +/* Using a wrapper for the parser, must the lex declaration. */ +#define YY_DECL int ragel_lex() + +%} + +/* Outside an fsm machine specification ("outside code"). */ +%x OC_SGL_LIT +%x OC_DBL_LIT +%x OC_C_COM +%x OC_CXX_COM + +/* Inside a fsm machine specification. */ +%x RL_INITIAL +%x RL_SLIT +%x RL_DLIT +%x RL_OREXP +%x RL_REGEXP +%x RL_REGEXP_OR +%x RL_SHELL_COM +%x RL_VERBOSE_EMBED +%x RL_WRITE + +/* Inline code. */ +%x IL_INITIAL +%x IL_SGL_LIT +%x IL_DBL_LIT +%x IL_C_COM +%x IL_CXX_COM + +WSCHAR [\t\n\v\f\r ] +IDENT [a-zA-Z_][a-zA-Z_0-9]* + +%% + + /* Numbers in outter code. */ +<INITIAL>[0-9]+ { + garble(); + passThrough( yytext ); +} + + /* Words in outter code. */ +<INITIAL>{IDENT} { + garble(); + passThrough( yytext ); +} + + /* Begin a c style comment. */ +<INITIAL>"/*" { + BEGIN(OC_C_COM); + extendToken(); + passThrough( yytext ); +} + /* Data in a C style comment. */ +<OC_C_COM>. extendToken(); passThrough( yytext ); +<OC_C_COM>\n extendToken(); passThrough( yytext ); + + /* Terminate a C style comment. */ +<OC_C_COM>"*/" { + BEGIN(INITIAL); + garble(); + passThrough( yytext ); +} + + /* Begin a C++ style comment. */ +<INITIAL>"//" { + BEGIN(OC_CXX_COM); + extendToken(); + passThrough( yytext ); +} + /* Data in a C++ style comment. */ +<OC_CXX_COM>[^\n]+ { + extendToken(); + passThrough( yytext ); +} + /* Terminate a C++ style comment. */ +<OC_CXX_COM>\n { + BEGIN(INITIAL); + garble(); + passThrough( yytext ); +} + + + /* Start literals. */ +<INITIAL>\' { + BEGIN(OC_SGL_LIT); + extendToken(); + passThrough( yytext ); +} +<INITIAL>\" { + BEGIN(OC_DBL_LIT); + extendToken(); + passThrough( yytext ); +} + /* Various escape sequences in literals. We don't need to get them + * all here. We just need to pick off the ones that could confuse us + * about the literal we are matchine */ +<OC_SGL_LIT,OC_DBL_LIT>\\\' extendToken(); passThrough( yytext ); +<OC_SGL_LIT,OC_DBL_LIT>\\\" extendToken(); passThrough( yytext ); +<OC_SGL_LIT,OC_DBL_LIT>\\\\ extendToken(); passThrough( yytext ); + /* Characters in literals. */ +<OC_DBL_LIT>[^\"] extendToken(); passThrough( yytext ); +<OC_SGL_LIT>[^\'] extendToken(); passThrough( yytext ); + /* Terminate a double literal */ +<OC_DBL_LIT>\" { + BEGIN(INITIAL); + garble(); + passThrough( yytext ); +} + /* Terminate a single literal. */ +<OC_SGL_LIT>\' { + BEGIN(INITIAL); + garble(); + passThrough( yytext ); +} + + /* Whitespace. */ +<INITIAL>{WSCHAR}+ { + garble(); + passThrough( yytext ); +} + + /* Section Deliminator */ +<INITIAL>"%%" { + BEGIN(RL_INITIAL); + multiline = false; + return emitNoData( TK_Section ); +} + + /* Section Deliminator */ +<INITIAL>"%%{" { + BEGIN(RL_INITIAL); + multiline = true; + return emitNoData( TK_Section ); +} + +<INITIAL>"{" { + garble(); + passThrough( yytext ); +} + +<INITIAL>"}" { + garble(); + passThrough( yytext ); +} + +<INITIAL>";" { + garble(); + passThrough( yytext ); +} + + /* Any other characters. */ +<INITIAL>. { + garble(); + passThrough( yytext ); +} + + /* Numbers. */ +<RL_INITIAL,IL_INITIAL>[0-9][0-9]* { + return emitToken( TK_UInt, yytext, yyleng ); +} +<RL_INITIAL,IL_INITIAL>0x[0-9a-fA-F][0-9a-fA-F]* { + return emitToken( TK_Hex, yytext, yyleng ); +} + + /* Keywords in RL and IL. */ +<RL_INITIAL>variable\ [a-zA-Z_]+ { + BEGIN(IL_INITIAL); + inlineBlockType = SemiTerminated; + return emitToken( KW_Variable, yytext+9, yyleng-9 ); +} +<RL_INITIAL>access { + BEGIN(IL_INITIAL); + inlineBlockType = SemiTerminated; + return emitNoData( KW_Access ); +} +<RL_INITIAL>action { + return emitNoData( KW_Action ); +} +<RL_INITIAL>alphtype { + BEGIN(IL_INITIAL); + inlineWhitespace = false; + inlineBlockType = SemiTerminated; + return emitNoData( KW_AlphType ); +} +<RL_INITIAL>getkey { + BEGIN(IL_INITIAL); + inlineBlockType = SemiTerminated; + return emitNoData( KW_GetKey ); +} +<RL_INITIAL>when { + return emitNoData( KW_When ); +} +<RL_INITIAL>eof { + return emitNoData( KW_Eof ); +} +<RL_INITIAL>err { + return emitNoData( KW_Err ); +} +<RL_INITIAL>lerr { + return emitNoData( KW_Lerr ); +} +<RL_INITIAL>to { + return emitNoData( KW_To ); +} +<RL_INITIAL>from { + return emitNoData( KW_From ); +} + + + /* +<RL_INITIAL>range { + return emitNoData( KW_Range ); +}*/ + +<RL_INITIAL>write { + BEGIN(RL_WRITE); + return emitNoData( KW_Write ); +} +<RL_INITIAL>machine { + return emitNoData( KW_Machine ); +} +<RL_INITIAL>include { + /* Include tokens statments are processed by both the scanner and the + * parser. The scanner opens the include file and switches to it and the + * parser invokes a new parser for handling the tokens. We use + * handlingInclude to indicate that the scanner is processing an include + * directive. Ends at ; */ + handlingInclude = true; + return emitNoData( KW_Include ); +} + +<RL_WRITE>{WSCHAR}+ garble(); +<RL_WRITE>; { + BEGIN(RL_INITIAL); + return emitNoData( ';' ); +} + + /* These must be synced in rlparse.y */ +<IL_INITIAL>fpc { + return emitNoData( KW_PChar ); +} +<IL_INITIAL>fc { + return emitNoData( KW_Char ); +} +<IL_INITIAL>fhold { + return emitNoData( KW_Hold ); +} +<IL_INITIAL>fgoto { + return emitNoData( KW_Goto ); +} +<IL_INITIAL>fcall { + return emitNoData( KW_Call ); +} +<IL_INITIAL>fret { + return emitNoData( KW_Ret ); +} +<IL_INITIAL>fcurs { + return emitNoData( KW_CurState ); +} +<IL_INITIAL>ftargs { + return emitNoData( KW_TargState ); +} +<IL_INITIAL>fentry { + return emitNoData( KW_Entry ); +} +<IL_INITIAL>fnext { + return emitNoData( KW_Next ); +} +<IL_INITIAL>fexec { + return emitNoData( KW_Exec ); +} +<IL_INITIAL>fbreak { + return emitNoData( KW_Break ); +} + + /* Words. */ +<RL_INITIAL,IL_INITIAL,RL_WRITE>{IDENT} { + return emitToken( TK_Word, yytext, yyleng ); +} + + /* Begin a shell style comment. */ +<RL_INITIAL># { + BEGIN(RL_SHELL_COM); + extendToken(); +} + /* Data in a shell style comment. */ +<RL_SHELL_COM>[^\n]+ { + extendToken(); +} + /* Terminate a C++ style comment. */ +<RL_SHELL_COM>\n { + BEGIN(RL_INITIAL); + garble(); +} + + /* + * Start single and double literals. + */ +<RL_INITIAL>' { + BEGIN(RL_SLIT); + extendToken(); +} +<RL_INITIAL>\" { + BEGIN(RL_DLIT); + extendToken(); +} + + /* Escape sequences in single and double literals. */ +<RL_SLIT,RL_DLIT>\\0 extendToken( "\0", 1 ); +<RL_SLIT,RL_DLIT>\\a extendToken( "\a", 1 ); +<RL_SLIT,RL_DLIT>\\b extendToken( "\b", 1 ); +<RL_SLIT,RL_DLIT>\\t extendToken( "\t", 1 ); +<RL_SLIT,RL_DLIT>\\n extendToken( "\n", 1 ); +<RL_SLIT,RL_DLIT>\\v extendToken( "\v", 1 ); +<RL_SLIT,RL_DLIT>\\f extendToken( "\f", 1 ); +<RL_SLIT,RL_DLIT>\\r extendToken( "\r", 1 ); +<RL_SLIT,RL_DLIT>\\\n extendToken(); +<RL_SLIT,RL_DLIT>\\. extendToken( yytext+1, 1 ); + + /* Characters in literals. */ +<RL_SLIT>[^'] extendToken( yytext, 1 ); +<RL_DLIT>[^"] extendToken( yytext, 1 ); + + /* Terminate a single literal. */ +<RL_SLIT>'[i]* { + BEGIN(RL_INITIAL); + return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 ); +} + /* Terminate a double literal */ +<RL_DLIT>\"[i]* { + BEGIN(RL_INITIAL); + return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 ); +} + + /* + * Start an OR expression. + */ +<RL_INITIAL>"[" { + BEGIN(RL_OREXP); + return emitNoData( RE_SqOpen ); +} + +<RL_INITIAL>"\[^" { + BEGIN(RL_OREXP); + return emitNoData( RE_SqOpenNeg ); +} + + /* Escape sequences in OR expressions. */ +<RL_OREXP>\\0 { return emitToken( RE_Char, "\0", 1 ); } +<RL_OREXP>\\a { return emitToken( RE_Char, "\a", 1 ); } +<RL_OREXP>\\b { return emitToken( RE_Char, "\b", 1 ); } +<RL_OREXP>\\t { return emitToken( RE_Char, "\t", 1 ); } +<RL_OREXP>\\n { return emitToken( RE_Char, "\n", 1 ); } +<RL_OREXP>\\v { return emitToken( RE_Char, "\v", 1 ); } +<RL_OREXP>\\f { return emitToken( RE_Char, "\f", 1 ); } +<RL_OREXP>\\r { return emitToken( RE_Char, "\r", 1 ); } +<RL_OREXP>\\\n { garble(); } +<RL_OREXP>\\. { return emitToken( RE_Char, yytext+1, 1 ); } + + /* Range dash in an OR expression. */ +<RL_OREXP>- { + return emitNoData( RE_Dash ); +} + + /* Characters in an OR expression. */ +<RL_OREXP>[^\]] { + return emitToken( RE_Char, yytext, 1 ); +} + + /* Terminate an OR expression. */ +<RL_OREXP>\] { + BEGIN(RL_INITIAL); + return emitNoData( RE_SqClose ); +} + + /* + * Start a regular expression. + */ +<RL_INITIAL>\/ { + BEGIN(RL_REGEXP); + return emitNoData( RE_Slash ); +} + + /* Escape sequences in regular expressions. */ +<RL_REGEXP,RL_REGEXP_OR>\\0 { + return emitToken( RE_Char, "\0", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\a { + return emitToken( RE_Char, "\a", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\b { + return emitToken( RE_Char, "\b", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\t { + return emitToken( RE_Char, "\t", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\n { + return emitToken( RE_Char, "\n", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\v { + return emitToken( RE_Char, "\v", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\f { + return emitToken( RE_Char, "\f", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\r { + return emitToken( RE_Char, "\r", 1 ); +} +<RL_REGEXP,RL_REGEXP_OR>\\\n { + garble(); +} +<RL_REGEXP,RL_REGEXP_OR>\\. { + return emitToken( RE_Char, yytext+1, 1 ); +} + + /* Special characters in a regular expression. */ +<RL_REGEXP>\. { + return emitNoData( RE_Dot ); +} +<RL_REGEXP>\* { + return emitNoData( RE_Star ); +} +<RL_REGEXP>"\[^" { + BEGIN(RL_REGEXP_OR); + return emitNoData( RE_SqOpenNeg ); +} +<RL_REGEXP>"\[" { + BEGIN(RL_REGEXP_OR); + return emitNoData( RE_SqOpen ); +} + + /* Range dash in a regular expression or set. */ +<RL_REGEXP_OR>- { + return emitNoData( RE_Dash ); +} + + /* Terminate an or set or a regular expression. */ +<RL_REGEXP_OR>\] { + BEGIN(RL_REGEXP); + return emitNoData( RE_SqClose ); +} + + /* Characters in a regular expression. */ +<RL_REGEXP,RL_REGEXP_OR>[^/] { + return emitToken( RE_Char, yytext, 1 ); +} + + /* Terminate a regular expression */ +<RL_REGEXP,RL_REGEXP_OR>\/[i]* { + BEGIN(RL_INITIAL); + return emitToken( RE_Slash, yytext, yyleng ); +} + + /* Builtin code move to Builtin initial. */ +<RL_INITIAL>"{" { + if ( openMachineSpecBlock() ) { + /* Plain bracket. */ + return emitNoData( *yytext ); + } + else { + /* Start an inline code block. Keep track of where it started in case + * it terminates prematurely. Return the open bracket. */ + BEGIN(IL_INITIAL); + inlineBlockType = CurlyDelimited; + il_code_first_line = id->last_line; + il_code_first_column = id->last_column+1; + builtinBrace++; + return emitNoData( *yytext ); + } +} + +<RL_INITIAL>\.\. { + return emitNoData( TK_DotDot ); +} + +<RL_INITIAL>:> { + return emitNoData( TK_ColonGt ); +} + +<RL_INITIAL>:>> { + return emitNoData( TK_ColonGtGt ); +} + +<RL_INITIAL><: { + return emitNoData( TK_LtColon ); +} + +<RL_INITIAL>-- { + return emitNoData( TK_DashDash ); +} + + /* The instantiation operator. */ +<RL_INITIAL>:= { + return emitNoData( TK_ColonEquals ); +} + + /* Error actions. */ +<RL_INITIAL>\>\! { + return emitNoData( TK_StartGblError ); +} +<RL_INITIAL>\$\! { + return emitNoData( TK_AllGblError ); +} +<RL_INITIAL>%\! { + return emitNoData( TK_FinalGblError ); +} +<RL_INITIAL><\! { + return emitNoData( TK_NotStartGblError ); +} +<RL_INITIAL>@\! { + return emitNoData( TK_NotFinalGblError ); +} +<RL_INITIAL><>\! { + return emitNoData( TK_MiddleGblError ); +} + + /* Local error actions. */ +<RL_INITIAL>\>\^ { + return emitNoData( TK_StartLocalError ); +} +<RL_INITIAL>\$\^ { + return emitNoData( TK_AllLocalError ); +} +<RL_INITIAL>%\^ { + return emitNoData( TK_FinalLocalError ); +} +<RL_INITIAL><\^ { + return emitNoData( TK_NotStartLocalError ); +} +<RL_INITIAL>@\^ { + return emitNoData( TK_NotFinalLocalError ); +} +<RL_INITIAL><>\^ { + return emitNoData( TK_MiddleLocalError ); +} + + /* EOF Actions. */ +<RL_INITIAL>\>\/ { + return emitNoData( TK_StartEOF ); +} +<RL_INITIAL>\$\/ { + return emitNoData( TK_AllEOF ); +} +<RL_INITIAL>%\/ { + return emitNoData( TK_FinalEOF ); +} +<RL_INITIAL><\/ { + return emitNoData( TK_NotStartEOF ); +} +<RL_INITIAL>@\/ { + return emitNoData( TK_NotFinalEOF ); +} +<RL_INITIAL><>\/ { + return emitNoData( TK_MiddleEOF ); +} + + /* To State Actions. */ +<RL_INITIAL>\>~ { + return emitNoData( TK_StartToState ); +} +<RL_INITIAL>\$~ { + return emitNoData( TK_AllToState ); +} +<RL_INITIAL>%~ { + return emitNoData( TK_FinalToState ); +} +<RL_INITIAL><~ { + return emitNoData( TK_NotStartToState ); +} +<RL_INITIAL>@~ { + return emitNoData( TK_NotFinalToState ); +} +<RL_INITIAL><>~ { + return emitNoData( TK_MiddleToState ); +} + + /* From State Actions. */ +<RL_INITIAL>\>\* { + return emitNoData( TK_StartFromState ); +} +<RL_INITIAL>\$\* { + return emitNoData( TK_AllFromState ); +} +<RL_INITIAL>%\* { + return emitNoData( TK_FinalFromState ); +} +<RL_INITIAL><\* { + return emitNoData( TK_NotStartFromState ); +} +<RL_INITIAL>@\* { + return emitNoData( TK_NotFinalFromState ); +} +<RL_INITIAL><>\* { + return emitNoData( TK_MiddleFromState ); +} + +<RL_INITIAL><> { + return emitNoData( TK_Middle ); +} + +<RL_INITIAL>\>\? { + return emitNoData( TK_StartCond ); +} +<RL_INITIAL>\$\? { + return emitNoData( TK_AllCond ); +} +<RL_INITIAL>%\? { + return emitNoData( TK_LeavingCond ); +} + + /* The Arrow operator. */ +<RL_INITIAL>-> { + return emitNoData( TK_Arrow ); +} + + /* The double arrow operator. */ +<RL_INITIAL>=> { + return emitNoData( TK_DoubleArrow ); +} + + /* Double star (longest match kleene star). */ +<RL_INITIAL>\*\* { + return emitNoData( TK_StarStar ); +} + + /* Name separator. */ +<RL_INITIAL>:: { + return emitNoData( TK_NameSep ); +} + + /* Opening of longest match. */ +<RL_INITIAL>\|\* { + return emitNoData( TK_BarStar ); +} + + /* Catch the repetition operator now to free up the parser. Once caught, + * Send only the opening brace and rescan the rest so it can be broken + * up for the parser. */ +<RL_INITIAL>\{([0-9]+(,[0-9]*)?|,[0-9]+)\} { + yyless(1); + return emitNoData( TK_RepOpOpen ); +} + + /* Section Deliminator */ +<RL_INITIAL>"}%%" { + BEGIN(INITIAL); + return emitNoData( TK_Section ); +} + + /* Whitespace. */ +<RL_INITIAL>[\t\v\f\r ] garble(); +<RL_INITIAL>\n { + if ( multiline ) + garble(); + else { + BEGIN(INITIAL); + return emitNoData( TK_SectionNL ); + } +} + + /* Any other characters. */ +<RL_INITIAL>. { + return emitNoData( *yytext ); +} + + /* End of input in a literal is an error. */ +<RL_SLIT,RL_DLIT><<EOF>> { + error(id->first_line, id->first_column) << "unterminated literal" << endl; + exit(1); +} + + /* End of input in a comment is an error. */ +<RL_SHELL_COM><<EOF>> { + error(id->first_line, id->first_column) << "unterminated comment" << endl; + exit(1); +} + + /* Begin a C style comment. */ +<IL_INITIAL>"/*" { + BEGIN(IL_C_COM); + il_comm_lit_first_line = id->last_line; + il_comm_lit_first_column = id->last_column+1; + extendToken( yytext, yyleng ); +} + /* Data in a C style comment. */ +<IL_C_COM>\n extendToken( yytext, 1 ); +<IL_C_COM>. extendToken( yytext, 1 ); + + /* Terminate a C style comment. */ +<IL_C_COM>"*/" { + BEGIN(IL_INITIAL); + return emitToken( IL_Comment, yytext, 2 ); +} + + /* Begin a C++ style comment. */ +<IL_INITIAL>"//" { + BEGIN(IL_CXX_COM); + il_comm_lit_first_line = id->last_line; + il_comm_lit_first_column = id->last_column+1; + extendToken( yytext, yyleng ); +} + /* Data in a C++ style comment. */ +<IL_CXX_COM>[^\n]+ { + extendToken( yytext, yyleng ); +} + /* Terminate a C++ style comment. */ +<IL_CXX_COM>\n { + BEGIN(IL_INITIAL); + return emitToken( IL_Comment, yytext, 1 ); +} + + + /* Start literals. */ +<IL_INITIAL>' { + BEGIN(IL_SGL_LIT); + il_comm_lit_first_line = id->last_line; + il_comm_lit_first_column = id->last_column+1; + extendToken( yytext, 1 ); +} +<IL_INITIAL>\" { + BEGIN(IL_DBL_LIT); + il_comm_lit_first_line = id->last_line; + il_comm_lit_first_column = id->last_column+1; + extendToken( yytext, 1 ); +} + /* Various escape sequences in literals. We don't need to get them + * all here. We just need to pick off the ones that could confuse us + * about the literal we are matching */ +<IL_SGL_LIT,IL_DBL_LIT>\\' extendToken( yytext, yyleng ); +<IL_SGL_LIT,IL_DBL_LIT>\\\" extendToken( yytext, yyleng ); +<IL_SGL_LIT,IL_DBL_LIT>\\\\ extendToken( yytext, yyleng ); + /* Characters in literals. */ +<IL_DBL_LIT>[^\"] extendToken( yytext, 1 ); +<IL_SGL_LIT>[^'] extendToken( yytext, 1 ); + + /* Terminate a double literal */ +<IL_DBL_LIT>\" { + BEGIN(IL_INITIAL); + return emitToken( IL_Literal, yytext, 1 ); +} + /* Terminate a single literal. */ +<IL_SGL_LIT>' { + BEGIN(IL_INITIAL); + return emitToken( IL_Literal, yytext, 1 ); +} + + /* Open Brace, increment count of open braces. */ +<IL_INITIAL>"{" { + builtinBrace++; + return emitToken( IL_Symbol, yytext, 1 ); +} + + /* Close brace, decrement count of open braces. */ +<IL_INITIAL>"}" { + builtinBrace--; + if ( inlineBlockType == CurlyDelimited && builtinBrace == 0 ) { + /* Inline code block ends. */ + BEGIN(RL_INITIAL); + inlineWhitespace = true; + return emitNoData( *yytext ); + } + else { + /* Either a semi terminated inline block or only the closing brace of + * some inner scope, not the block's closing brace. */ + return emitToken( IL_Symbol, yytext, 1 ); + } +} + + /* May need to terminate the inline block. */ +<IL_INITIAL>; { + if ( inlineBlockType == SemiTerminated ) { + /* Inline code block ends. */ + BEGIN(RL_INITIAL); + inlineWhitespace = true; + return emitNoData( TK_Semi ); + } + else { + /* Not ending. The semi is sent as a token, not a generic symbol. */ + return emitNoData( *yytext ); + } +} + + /* Catch some symbols so they can be + * sent as tokens instead as generic symbols. */ +<IL_INITIAL>[*()] { + return emitNoData( *yytext ); +} +<IL_INITIAL>:: { + return emitNoData( TK_NameSep ); +} + + /* Whitespace. */ +<IL_INITIAL>{WSCHAR}+ { + if ( inlineWhitespace ) + return emitToken( IL_WhiteSpace, yytext, yyleng ); +} + + /* Any other characters. */ +<IL_INITIAL>. { + return emitToken( IL_Symbol, yytext, 1 ); +} + +<INITIAL><<EOF>> { + /* If we are not at the bottom of the include stack, then pop the current + * file that we are scanning. Since we are always returning 0 to the parser + * it will exit and return to the parser that called it. */ + if ( inc_stack_ptr > 0 ) + popInclude(); + return 0; +} + + /* End of input in a literal is an error. */ +<IL_SGL_LIT,IL_DBL_LIT><<EOF>> { + error(il_comm_lit_first_line, il_comm_lit_first_column) << + "unterminated literal" << endl; + exit(1); +} + + /* End of input in a comment is an error. */ +<IL_C_COM,IL_CXX_COM><<EOF>> { + error(il_comm_lit_first_line, il_comm_lit_first_column) << + "unterminated comment" << endl; + exit(1); +} + + /* End of intput in a code block. */ +<IL_INITIAL><<EOF>> { + error(il_code_first_line, il_code_first_column) << + "unterminated code block" << endl; + exit(1); +} + +%% + +/* Write out token data, escaping special charachters. */ +#ifdef WANT_TOKEN_WRITE +void writeToken( int token, char *data ) +{ + cout << "token id " << token << " at " << id->fileName << ":" << + yylloc->first_line << ":" << yylloc->first_column << "-" << + yylloc->last_line << ":" << yylloc->last_column << " "; + + if ( data != 0 ) { + while ( *data != 0 ) { + switch ( *data ) { + case '\n': cout << "\\n"; break; + case '\t': cout << "\\t"; break; + default: cout << *data; break; + } + data += 1; + } + } + cout << endl; +} +#endif + +/* Caclulate line info from yytext. Called on every pattern match. */ +void updateLineInfo() +{ + /* yytext should always have at least one char. */ + assert( yytext[0] != 0 ); + + /* Scan through yytext up to the last character. */ + char *p = yytext; + for ( ; p[1] != 0; p++ ) { + if ( p[0] == '\n' ) { + id->last_line += 1; + id->last_column = 0; + } + else { + id->last_column += 1; + } + } + + /* Always consider the last character as not a newline. Newlines at the + * end of a token are as any old character at the end of the line. */ + id->last_column += 1; + + /* The caller may be about to emit a token, be prepared to pass the line + * info to the parser. */ + yylloc->first_line = id->first_line; + yylloc->first_column = id->first_column; + yylloc->last_line = id->last_line; + yylloc->last_column = id->last_column; + + /* If the last character was indeed a newline, then wrap ahead now. */ + if ( p[0] == '\n' ) { + id->last_line += 1; + id->last_column = 0; + } +} + +/* Eat up a matched pattern that will not be part of a token. */ +void garble() +{ + /* Update line information from yytext. */ + updateLineInfo(); + + /* The next token starts ahead of the last token. */ + id->first_line = id->last_line; + id->first_column = id->last_column + 1; +} + +/* Append data to the end of the token. More token data expected. */ +void extendToken( char *data, int len ) +{ + if ( data != 0 && len > 0 ) + tokbuf.append( data, len ); + + /* Update line information from yytext. */ + updateLineInfo(); +} + +/* Extend, but with no data, more data to come. */ +void extendToken() +{ + /* Update line information from yytext. */ + updateLineInfo(); +} + + +/* Possibly process include data. */ +void processInclude( int token ) +{ + static char *incFileName = 0; + + if ( handlingInclude ) { + if ( token == KW_Include ) + incFileName = 0; + else if ( token == TK_Literal ) + incFileName = yylval->data.data; + else if ( token == ';' ) { + /* Terminate the include statement. Start reading from included file. */ + handlingInclude = false; + + if ( id->active && includeDepth < INCLUDE_STACK_SIZE ) { + /* If there is no section name or input file, default to the curren values. */ + if ( incFileName == 0 ) + incFileName = id->fileName; + + /* Make the new buffer and switch to it. */ + FILE *incFile = fopen( incFileName, "rt" ); + if ( incFile != 0 ) { + buff_stack[inc_stack_ptr] = YY_CURRENT_BUFFER; + multiline_stack[inc_stack_ptr] = multiline; + inc_stack_ptr += 1; + yy_switch_to_buffer( yy_create_buffer( incFile, YY_BUF_SIZE ) ); + BEGIN(INITIAL); + } + else { + error(*yylloc) << "could not locate include file \"" << incFileName + << "\"" << endl; + } + } + } + } +} + +void popInclude() +{ + /* Free the current buffer and move to the previous. */ + yy_delete_buffer( YY_CURRENT_BUFFER ); + inc_stack_ptr -= 1; + yy_switch_to_buffer( buff_stack[inc_stack_ptr] ); + multiline = multiline_stack[inc_stack_ptr]; + + /* Includes get called only from RL_INITIAL. */ + BEGIN(RL_INITIAL); +} + + +/* Append data to the end of a token and emitToken it to the parser. */ +int emitToken( int token, char *data, int len ) +{ + /* Append any new data. */ + if ( data != 0 && len > 0 ) + tokbuf.append( data, len ); + + /* Duplicate the buffer. */ + yylval->data.length = tokbuf.length; + yylval->data.data = new char[tokbuf.length+1]; + memcpy( yylval->data.data, tokbuf.data, tokbuf.length ); + yylval->data.data[tokbuf.length] = 0; + + /* Update line information from yytext. */ + updateLineInfo(); + + /* Write token info. */ +#ifdef WANT_TOKEN_WRITE + writeToken( token, tokbuf.data ); +#endif + + /* Clear out the buffer. */ + tokbuf.clear(); + + /* The next token starts ahead of the last token. */ + id->first_line = id->last_line; + id->first_column = id->last_column + 1; + + /* Maintain a record of two tokens back. */ + previous_tokens[1] = previous_tokens[0]; + previous_tokens[0] = token; + + /* Possibly process the include statement; */ + processInclude( token ); + + return token; +} + +/* Emit a token with no data to the parser. */ +int emitNoData( int token ) +{ + /* Return null to the parser. */ + yylval->data.data = 0; + yylval->data.length = 0; + + /* Update line information from yytext. */ + updateLineInfo(); + + /* Write token info. */ +#ifdef WANT_TOKEN_WRITE + writeToken( token, 0 ); +#endif + + /* Clear out the buffer. */ + tokbuf.clear(); + + /* The next token starts ahead of the last token. */ + id->first_line = id->last_line; + id->first_column = id->last_column + 1; + + /* Maintain a record of two tokens back. */ + previous_tokens[1] = previous_tokens[0]; + previous_tokens[0] = token; + + /* Possibly process the include statement; */ + processInclude( token ); + + return token; +} + +/* Pass tokens in outter code through to the output. */ +void passThrough( char *data ) +{ + /* If no errors and we are at the bottom of the include stack (the source + * file listed on the command line) then write out the data. */ + if ( gblErrorCount == 0 && inc_stack_ptr == 0 && + machineSpec == 0 && machineName == 0 ) + { + xmlEscapeHost( *outStream, data ); + } +} + +/* Init a buffer. */ +Buffer::Buffer() +: + data(0), + length(0), + allocated(0) +{ +} + +/* Empty out a buffer on destruction. */ +Buffer::~Buffer() +{ + empty(); +} + +/* Free the space allocated for the buffer. */ +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +/* Grow the buffer when to len allocation. */ +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +int yywrap() +{ + /* Once processessing of the input is done, signal no more. */ + return 1; +} + +/* Here simply to suppress the unused yyunpt warning. */ +void thisFuncIsNeverCalled() +{ + yyunput(0, 0); +} + +/* Put the scannner back into the outside code start state. */ +void beginOutsideCode() +{ + BEGIN(INITIAL); +} + +/* Determine if we are opening a machine specification block. */ +bool openMachineSpecBlock() +{ + if ( previous_tokens[1] == TK_Section && previous_tokens[0] == TK_Word ) + return true; + else if ( previous_tokens[0] == TK_Section ) + return true; + return false; +} + +/* Wrapper for the lexer which stores the locations of the value and location + * variables of the parser into globals. The parser is reentrant, however the scanner + * does not need to be, so globals work fine. This saves us passing them around + * all the helper functions. */ +int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ) +{ + ::yylval = yylval; + ::yylloc = yylloc; + return ragel_lex(); +} + diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl new file mode 100644 index 0000000..b1396f1 --- /dev/null +++ b/ragel/rlscan.rl @@ -0,0 +1,907 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +/* This is used for tracking the current stack of include file/machine pairs. It is + * is used to detect and recursive include structure. */ +struct IncludeStackItem +{ + IncludeStackItem( char *fileName, char *sectionName ) + : fileName(fileName), sectionName(sectionName) {} + + char *fileName; + char *sectionName; +}; + +typedef Vector<IncludeStackItem> IncludeStack; +IncludeStack includeStack; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + +struct Scanner +{ + Scanner( char *fileName, istream &input, + Parser *inclToParser, char *inclSectionTarg, + int include_depth ) + : + fileName(fileName), input(input), + inclToParser(inclToParser), + inclSectionTarg(inclSectionTarg), + include_depth(include_depth), + line(1), column(1), lastnl(0), + parser(0), active(false), + parserExistsError(false), ragelDefOpen(false), + whitespaceOn(true) + {} + + bool recursiveInclude( IncludeStack &includeStack, + char *inclFileName, char *inclSectionName ); + + char *prepareFileName( char *fileName, int len ) + { + bool caseInsensitive; + Token tokenFnStr, tokenRes; + tokenFnStr.data = fileName; + tokenFnStr.length = len; + tokenFnStr.prepareLitString( tokenRes, caseInsensitive ); + return tokenRes.data; + } + + void init(); + void token( int type, char *start, char *end ); + void token( int type, char *string ); + void token( int type ); + void updateCol(); + void startSection(); + void endSection(); + void openRagelDef(); + void do_scan(); + bool parserExists(); + ostream &error(); + + char *fileName; + istream &input; + Parser *inclToParser; + char *inclSectionTarg; + int include_depth; + + int cs; + int line; + char *word, *lit; + int word_len, lit_len; + InputLoc sectionLoc; + char *tokstart, *tokend; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + Parser *parser; + bool active; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + bool ragelDefOpen; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; +}; + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void Scanner::init( ) +{ + %% write init; +} + +bool Scanner::parserExists() +{ + if ( parser != 0 ) + return true; + + if ( ! parserExistsError ) { + error() << "include: there is no previous specification name" << endl; + parserExistsError = true; + } + return false; +} + +ostream &Scanner::error() +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << fileName << ":" << line << ":" << column << ": "; + return cerr; +} + +bool Scanner::recursiveInclude( IncludeStack &includeStack, + char *inclFileName, char *inclSectionName ) +{ + for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { + if ( strcmp( si->fileName, inclFileName ) == 0 && + strcmp( si->sectionName, inclSectionName ) == 0 ) + { + return true; + } + } + return false; +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = tokstart; + //cerr << "adding " << tokend - from << " to column" << endl; + column += tokend - from; + lastnl = 0; +} + +void Scanner::token( int type, char *string ) +{ + token( type, string, string + strlen(string) ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + +%%{ + machine section_parse; + + # This relies on the the kelbt implementation and the order + # that tokens are declared. + KW_Machine = 128; + KW_Include = 129; + KW_Write = 130; + TK_Word = 131; + TK_Literal = 132; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_word { word = tokdata; word_len = toklen; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { error() << "bad machine statement" << endl; } + action incl_err { error() << "bad include statement" << endl; } + action write_err { error() << "bad write statement" << endl; } + + action handle_machine + { + /* Assign a name to the machine. */ + char *machine = word; + //cerr << "scanner: machine statement: " << machine << endl; + + if ( inclSectionTarg == 0 ) { + active = true; + + ParserDictEl *pdEl = parserDict.find( machine ); + if ( pdEl != 0 ) { + //cerr << "scanner: using existing parser" << endl; + } + else { + //cerr << "scanner: creating a new parser" << endl; + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser( fileName, machine, sectionLoc ); + pdEl->value->init(); + parserDict.insert( pdEl ); + } + + parser = pdEl->value; + } + else if ( strcmp( inclSectionTarg, machine ) == 0 ) { + //cerr << "scanner: found include target" << endl; + active = true; + parser = inclToParser; + } + else { + //cerr << "scanner: ignoring section" << endl; + active = false; + parser = 0; + } + } + + machine_stmt = + ( KW_Machine TK_Word @store_word ';' ) @handle_machine + <>err mach_err <>eof mach_err; + + action handle_include + { + if ( active && parserExists() ) { + char *inclSectionName = word; + char *inclFileName = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) + inclFileName = prepareFileName( lit, lit_len ); + else + inclFileName = fileName; + + /* Open the file and process it. */ + //cerr << "scanner: include: " << inclSectionName << " " << inclFileName << endl; + + /* Check for a recursive include structure. Add the current file/section + * name then check if what we are including is already in the stack. */ + includeStack.append( IncludeStackItem( fileName, parser->sectionName ) ); + + if ( recursiveInclude( includeStack, inclFileName, inclSectionName ) ) + error() << "include: this is a recursive include operation" << endl; + else { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inclFileName ); + if ( ! inFile->is_open() ) { + error() << "include: could not open " << + inclFileName << " for reading" << endl; + } + + Scanner scanner( inclFileName, *inFile, parser, + inclSectionName, include_depth+1 ); + scanner.init(); + scanner.do_scan( ); + delete inFile; + } + + /* Remove the last element (len-1) */ + includeStack.remove( -1 ); + } + } + + include_names = ( + TK_Word @store_word ( TK_Literal @store_lit )? | + TK_Literal @store_lit + ) >clear_words; + + include_stmt = + ( KW_Include include_names ';' ) @handle_include + <>err incl_err <>eof incl_err; + + action write_command + { + if ( active ) { + openRagelDef(); + if ( strcmp( tokdata, "data" ) != 0 && + strcmp( tokdata, "init" ) != 0 && + strcmp( tokdata, "exec" ) != 0 && + strcmp( tokdata, "eof" ) != 0 ) + { + error() << "unknown write command" << endl; + } + *outStream << " <write what=\"" << tokdata << "\">"; + } + } + + action write_option + { + if ( active ) + *outStream << "<option>" << tokdata << "</option>"; + } + action write_close + { + if ( active ) + *outStream << "</write>\n"; + } + + write_stmt = + ( KW_Write TK_Word @write_command + ( TK_Word @write_option )* ';' @write_close ) + <>err write_err <>eof write_err; + + action handle_token + { + /* Send the token off to the parser. */ + if ( active && parserExists() ) { + InputLoc loc; + + //cerr << "scanner:" << line << ":" << column << + // ": sending token to the parser " << lelNames[*p]; + //if ( tokdata != 0 ) + // cerr << " " << tokdata; + //cerr << endl; + + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, type, tokdata, toklen ); + } + } + + # Catch everything else. + everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token; + + main := ( + machine_stmt | + include_stmt | + write_stmt | + everything_else + )*; +}%% + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + int *p = &type; + int *pe = &type + 1; + + if ( start != 0 ) { + toklen = end-start; + tokdata = new char[toklen+1]; + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + if ( include_depth == 0 ) { + if ( machineSpec == 0 && machineName == 0 ) + *outStream << "</host>\n"; + ragelDefOpen = false; + } + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = 0; +} + +void Scanner::openRagelDef() +{ + if ( ! ragelDefOpen ) { + ragelDefOpen = true; + *outStream << "<ragel_def name=\"" << parser->sectionName << "\">\n"; + } +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + %%{ + machine section_parse; + write eof; + }%% + + /* Close off the section with the parser. */ + if ( active && parserExists() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = 0; + + parser->token( loc, TK_EndSection, 0, 0 ); + } + + if ( include_depth == 0 ) { + if ( ragelDefOpen ) { + *outStream << "</ragel_def>\n"; + ragelDefOpen = false; + } + + if ( machineSpec == 0 && machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + *outStream << "<host line=\"" << line << "\">"; + } + } +} + +%%{ + machine rlscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + c_comment = + '/*' ( any | NL )* :>> '*/'; + + cpp_comment = + '//' [^\n]* NL; + + c_cpp_comment = c_comment | cpp_comment; + + # These literal forms are common to C-like host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + # An inline block of code. This is specified as a scanned, but is sent to + # the parser as one long block. The inline_block pointer is used to handle + # the preservation of the data. + inline_code := |* + # Inline expression keywords. + "fpc" => { token( KW_PChar ); }; + "fc" => { token( KW_Char ); }; + "fcurs" => { token( KW_CurState ); }; + "ftargs" => { token( KW_TargState ); }; + "fentry" => { + whitespaceOn = false; + token( KW_Entry ); + }; + + # Inline statement keywords. + "fhold" => { + whitespaceOn = false; + token( KW_Hold ); + }; + "fexec" => { token( KW_Exec, 0, 0 ); }; + "fgoto" => { + whitespaceOn = false; + token( KW_Goto ); + }; + "fnext" => { + whitespaceOn = false; + token( KW_Next ); + }; + "fcall" => { + whitespaceOn = false; + token( KW_Call ); + }; + "fret" => { + whitespaceOn = false; + token( KW_Ret ); + }; + "fbreak" => { + whitespaceOn = false; + token( KW_Break ); + }; + + ident => { token( TK_Word, tokstart, tokend ); }; + + number => { token( TK_UInt, tokstart, tokend ); }; + hex_number => { token( TK_Hex, tokstart, tokend ); }; + + ( s_literal | d_literal ) + => { token( IL_Literal, tokstart, tokend ); }; + + whitespace+ => { + if ( whitespaceOn ) + token( IL_WhiteSpace, tokstart, tokend ); + }; + c_cpp_comment => { token( IL_Comment, tokstart, tokend ); }; + + "::" => { token( TK_NameSep, tokstart, tokend ); }; + + # Some symbols need to go to the parser as with their cardinal value as + # the token type (as opposed to being sent as anonymous symbols) + # because they are part of the sequences which we interpret. The * ) ; + # symbols cause whitespace parsing to come back on. This gets turned + # off by some keywords. + + ";" => { + whitespaceOn = true; + token( *tokstart, tokstart, tokend ); + if ( inlineBlockType == SemiTerminated ) + fgoto parser_def; + }; + + [*)] => { + whitespaceOn = true; + token( *tokstart, tokstart, tokend ); + }; + + [,(] => { token( *tokstart, tokstart, tokend ); }; + + '{' => { + token( IL_Symbol, tokstart, tokend ); + curly_count += 1; + }; + + '}' => { + if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { + /* Inline code block ends. */ + token( '}' ); + fgoto parser_def; + } + else { + /* Either a semi terminated inline block or only the closing + * brace of some inner scope, not the block's closing brace. */ + token( IL_Symbol, tokstart, tokend ); + } + }; + + # Send every other character as a symbol. + any => { token( IL_Symbol, tokstart, tokend ); }; + *|; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( RE_Char, "\0" ); }; + '\\a' => { token( RE_Char, "\a" ); }; + '\\b' => { token( RE_Char, "\b" ); }; + '\\t' => { token( RE_Char, "\t" ); }; + '\\n' => { token( RE_Char, "\n" ); }; + '\\v' => { token( RE_Char, "\v" ); }; + '\\f' => { token( RE_Char, "\f" ); }; + '\\r' => { token( RE_Char, "\r" ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, tokstart+1, tokend ); }; + + # Range dash in an OR expression. + '-' => { token( RE_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( RE_SqClose ); fret; }; + + # Characters in an OR expression. + [^\]] => { token( RE_Char, tokstart, tokend ); }; + *|; + + re_literal := |* + # Escape sequences in regular expressions. + '\\0' => { token( RE_Char, "\0" ); }; + '\\a' => { token( RE_Char, "\a" ); }; + '\\b' => { token( RE_Char, "\b" ); }; + '\\t' => { token( RE_Char, "\t" ); }; + '\\n' => { token( RE_Char, "\n" ); }; + '\\v' => { token( RE_Char, "\v" ); }; + '\\f' => { token( RE_Char, "\f" ); }; + '\\r' => { token( RE_Char, "\r" ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, tokstart+1, tokend ); }; + + # Terminate an OR expression. + '/' [i]? => { + token( RE_Slash, tokstart, tokend ); + fgoto parser_def; + }; + + # Special characters. + '.' => { token( RE_Dot ); }; + '*' => { token( RE_Star ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + # Characters in an OR expression. + [^\/] => { token( RE_Char, tokstart, tokend ); }; + *|; + + write_statement := |* + ident => { token( TK_Word, tokstart, tokend ); } ; + [ \t\n]+ => { updateCol(); }; + ';' => { token( ';' ); fgoto parser_def; }; + *|; + + # Parser definitions. + parser_def := |* + 'machine' => { token( KW_Machine ); }; + 'include' => { token( KW_Include ); }; + 'write' => { + token( KW_Write ); + fgoto write_statement; + }; + 'action' => { token( KW_Action ); }; + 'alphtype' => { token( KW_AlphType ); }; + 'range' => { token( KW_Range ); }; + 'getkey' => { + token( KW_GetKey ); + inlineBlockType = SemiTerminated; + fgoto inline_code; + }; + 'access' => { + token( KW_Access ); + inlineBlockType = SemiTerminated; + fgoto inline_code; + }; + 'variable' => { + token( KW_Variable ); + inlineBlockType = SemiTerminated; + fgoto inline_code; + }; + 'when' => { token( KW_When ); }; + 'eof' => { token( KW_Eof ); }; + 'err' => { token( KW_Err ); }; + 'lerr' => { token( KW_Lerr ); }; + 'to' => { token( KW_To ); }; + 'from' => { token( KW_From ); }; + + # Identifiers. + ident => { token( TK_Word, tokstart, tokend ); } ; + + # Numbers + number => { token( TK_UInt, tokstart, tokend ); }; + hex_number => { token( TK_Hex, tokstart, tokend ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, tokstart, tokend ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( RE_Slash ); fgoto re_literal; }; + + # Ignore. + pound_comment => { updateCol(); }; + + ':=' => { token( TK_ColonEquals ); }; + + # To State Actions. + ">~" => { token( TK_StartToState ); }; + "$~" => { token( TK_AllToState ); }; + "%~" => { token( TK_FinalToState ); }; + "<~" => { token( TK_NotStartToState ); }; + "@~" => { token( TK_NotFinalToState ); }; + "<>~" => { token( TK_MiddleToState ); }; + + # From State actions + ">*" => { token( TK_StartFromState ); }; + "$*" => { token( TK_AllFromState ); }; + "%*" => { token( TK_FinalFromState ); }; + "<*" => { token( TK_NotStartFromState ); }; + "@*" => { token( TK_NotFinalFromState ); }; + "<>*" => { token( TK_MiddleFromState ); }; + + # EOF Actions. + ">/" => { token( TK_StartEOF ); }; + "$/" => { token( TK_AllEOF ); }; + "%/" => { token( TK_FinalEOF ); }; + "</" => { token( TK_NotStartEOF ); }; + "@/" => { token( TK_NotFinalEOF ); }; + "<>/" => { token( TK_MiddleEOF ); }; + + # Global Error actions. + ">!" => { token( TK_StartGblError ); }; + "$!" => { token( TK_AllGblError ); }; + "%!" => { token( TK_FinalGblError ); }; + "<!" => { token( TK_NotStartGblError ); }; + "@!" => { token( TK_NotFinalGblError ); }; + "<>!" => { token( TK_MiddleGblError ); }; + + # Local error actions. + ">^" => { token( TK_StartLocalError ); }; + "$^" => { token( TK_AllLocalError ); }; + "%^" => { token( TK_FinalLocalError ); }; + "<^" => { token( TK_NotStartLocalError ); }; + "@^" => { token( TK_NotFinalLocalError ); }; + "<>^" => { token( TK_MiddleLocalError ); }; + + # Middle. + "<>" => { token( TK_Middle ); }; + + # Conditions. + '>?' => { token( TK_StartCond ); }; + '$?' => { token( TK_AllCond ); }; + '%?' => { token( TK_LeavingCond ); }; + + '..' => { token( TK_DotDot ); }; + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + '->' => { token( TK_Arrow ); }; + '=>' => { token( TK_DoubleArrow ); }; + + ":>" => { token( TK_ColonGt ); }; + ":>>" => { token( TK_ColonGtGt ); }; + "<:" => { token( TK_LtColon ); }; + + # Opening of longest match. + "|*" => { token( TK_BarStar ); }; + + '}%%' => { + /* In order to generate anything we must be in the top level file + * and the current spec must be active and there must not have been + * any parse errors. */ + updateCol(); + endSection(); + fgoto main; + }; + + [ \t]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { + updateCol(); + if ( singleLineSpec ) { + /* In order to generate anything we must be in the top level file + * and the current spec must be active and there must not have been + * any parse errors. */ + endSection(); + fgoto main; + } + }; + + '{' => { + token( '{' ); + curly_count = 1; + inlineBlockType = CurlyDelimited; + fgoto inline_code; + }; + + any => { token( *tokstart ); } ; + *|; + + action pass { + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( include_depth == 0 && machineSpec == 0 && machineName == 0 ) + xmlEscapeHost( *outStream, tokstart, tokend-tokstart ); + } + + # Outside code scanner. These tokens get passed through. + main := |* + ident => pass; + number => pass; + c_cpp_comment => pass; + s_literal | d_literal => pass; + '%%{' => { + updateCol(); + singleLineSpec = false; + startSection(); + fgoto parser_def; + }; + '%%' => { + updateCol(); + singleLineSpec = true; + startSection(); + fgoto parser_def; + }; + whitespace+ => pass; + EOF; + any => pass; + *|; + +}%% + +%% write data; + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + const char last_char = 0; + int cs, act, have = 0; + int top, stack[1]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType; + + %% write init; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + //cerr << "FULL BUFFER, NEW SIZE: " << bufsize << endl; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( tokstart != 0 ) + tokstart = newbuf + ( tokstart - buf ); + tokend = newbuf + ( tokend - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = last_char, len = 1; + execute = false; + } + + char *pe = p + len; + %% write exec; + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. */ + //cerr << "PARSE ERROR" << endl; + exit(1); + } + + /* Decide if we need to preserve anything. */ + char *preserve = tokstart; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( tokstart != 0 ) + tokstart -= shiftback; + tokend -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} + +void scan( char *fileName, istream &input ) +{ + Scanner scanner( fileName, input, 0, 0, 0 ); + scanner.init(); + scanner.do_scan(); +} + diff --git a/ragel/xmlcodegen.cpp b/ragel/xmlcodegen.cpp new file mode 100644 index 0000000..bc9c155 --- /dev/null +++ b/ragel/xmlcodegen.cpp @@ -0,0 +1,675 @@ +/* + * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "ragel.h" +#include "xmlcodegen.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include <string.h> + +using namespace std; + +XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, + std::ostream &out ) +: + fsmName(fsmName), + pd(pd), + fsm(fsm), + out(out), + nextActionTableId(0) +{ +} + + +void XMLCodeGen::writeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + /* Write the list. */ + out << " <action_list length=\"" << nextActionId << "\">\n"; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + writeAction( act ); + } + out << " </action_list>\n"; +} + +void XMLCodeGen::writeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + out << " <action_table_list length=\"" << numTables << "\">\n"; + for ( int t = 0; t < numTables; t++ ) { + out << " <action_table id=\"" << t << "\" length=\"" << + tables[t]->key.length() << "\">"; + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + out << atel->value->actionId; + if ( ! atel.last() ) + out << " "; + } + out << "</action_table>\n"; + } + out << " </action_table_list>\n"; + + delete[] tables; +} + +void XMLCodeGen::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + +void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, TransAp *trans ) +{ + if ( trans->toState != 0 || trans->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); +} + +void XMLCodeGen::writeKey( Key key ) +{ + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); +} + +void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans ) +{ + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->actionTable ); + + /* Write the transition. */ + out << " <t>"; + writeKey( lowKey ); + out << " "; + writeKey( highKey ); + + if ( trans->toState != 0 ) + out << " " << trans->toState->alg.stateNum; + else + out << " x"; + + if ( actionTable != 0 ) + out << " " << actionTable->id; + else + out << " x"; + out << "</t>\n"; +} + +void XMLCodeGen::writeTransList( StateAp *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + out << " <trans_list length=\"" << outList.length() << "\">\n"; + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + writeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + out << " </trans_list>\n"; +} + +void XMLCodeGen::writeLmSwitch( InlineItem *item ) +{ + LongestMatch *longestMatch = item->longestMatch; + + out << "<lm_switch"; + if ( longestMatch->lmSwitchHandlesError ) + out << " handles_error=\"t\""; + out << ">\n"; + + for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect && lmi->action != 0 ) { + /* Open the action. Write it with the context that sets up _p + * when doing control flow changes from inside the machine. */ + out << " <sub_action id=\"" << lmi->longestMatchId << "\">"; + writeInlineList( lmi->action->inlineList, item ); + out << "</sub_action>\n"; + } + } + + out << " </lm_switch><exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeText( InlineItem *item ) +{ + if ( item->prev == 0 || item->prev->type != InlineItem::Text ) + out << "<text>"; + xmlEscapeHost( out, item->data, strlen(item->data) ); + if ( item->next == 0 || item->next->type != InlineItem::Text ) + out << "</text>"; +} + +void XMLCodeGen::writeCtrlFlow( InlineItem *item, InlineItem *context ) +{ + if ( context != 0 ) { + out << "<sub_action>"; + + switch ( context->type ) { + case InlineItem::LmOnLast: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmOnNext: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmOnLagBehind: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmSwitch: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + default: break; + } + } + + switch ( item->type ) { + case InlineItem::Goto: + writeGoto( item, context ); + break; + case InlineItem::GotoExpr: + writeGotoExpr( item, context ); + break; + case InlineItem::Call: + writeCall( item, context ); + break; + case InlineItem::CallExpr: + writeCallExpr( item, context ); + break; + case InlineItem::Next: + writeNext( item, context ); + break; + case InlineItem::NextExpr: + writeNextExpr( item, context ); + break; + case InlineItem::Break: + out << "<break></break>"; + break; + case InlineItem::Ret: + out << "<ret></ret>"; + break; + default: break; + } + + if ( context != 0 ) + out << "</sub_action>"; +} + +void XMLCodeGen::writePtrMod( InlineItem *item, InlineItem *context ) +{ + if ( context != 0 && ( context->type == InlineItem::LmOnNext || + context->type == InlineItem::LmOnLagBehind || + context->type == InlineItem::LmSwitch ) ) + { + switch ( item->type ) { + case InlineItem::Hold: + out << "<holdte></holdte>"; + break; + case InlineItem::Exec: + writeActionExecTE( item ); + break; + default: break; + } + } + else { + switch ( item->type ) { + case InlineItem::Hold: + out << "<hold></hold>"; + break; + case InlineItem::Exec: + writeActionExec( item ); + break; + default: break; + } + } +} + + +void XMLCodeGen::writeGoto( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<goto>-1</goto>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<goto>" << targ->value->alg.stateNum << "</goto>"; + } +} + +void XMLCodeGen::writeCall( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<call>-1</call>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<call>" << targ->value->alg.stateNum << "</call>"; + } +} + +void XMLCodeGen::writeNext( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<next>-1</next>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<next>" << targ->value->alg.stateNum << "</next>"; + } +} + +void XMLCodeGen::writeGotoExpr( InlineItem *item, InlineItem *context ) +{ + out << "<goto_expr>"; + writeInlineList( item->children, 0 ); + out << "</goto_expr>"; +} + +void XMLCodeGen::writeCallExpr( InlineItem *item, InlineItem *context ) +{ + out << "<call_expr>"; + writeInlineList( item->children, 0 ); + out << "</call_expr>"; +} + +void XMLCodeGen::writeNextExpr( InlineItem *item, InlineItem *context ) +{ + out << "<next_expr>"; + writeInlineList( item->children, 0 ); + out << "</next_expr>"; +} + +void XMLCodeGen::writeEntry( InlineItem * item ) +{ + if ( pd->generatingSectionSubset ) + out << "<entry>-1</entry>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<entry>" << targ->value->alg.stateNum << "</entry>"; + } +} + +void XMLCodeGen::writeActionExec( InlineItem *item ) +{ + out << "<exec>"; + writeInlineList( item->children, 0 ); + out << "</exec>"; +} + +void XMLCodeGen::writeActionExecTE( InlineItem *item ) +{ + out << "<execte>"; + writeInlineList( item->children, 0 ); + out << "</execte>"; +} + +void XMLCodeGen::writeLmOnLast( InlineItem *item ) +{ + out << "<set_tokend>1</set_tokend>"; + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeLmOnNext( InlineItem *item ) +{ + out << "<set_tokend>0</set_tokend>"; + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeLmOnLagBehind( InlineItem *item ) +{ + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + + +void XMLCodeGen::writeInlineList( InlineList *inlineList, InlineItem *context ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + writeText( item ); + break; + case InlineItem::Goto: case InlineItem::GotoExpr: + case InlineItem::Call: case InlineItem::CallExpr: + case InlineItem::Next: case InlineItem::NextExpr: + case InlineItem::Break: case InlineItem::Ret: + writeCtrlFlow( item, context ); + break; + case InlineItem::PChar: + out << "<pchar></pchar>"; + break; + case InlineItem::Char: + out << "<char></char>"; + break; + case InlineItem::Curs: + out << "<curs></curs>"; + break; + case InlineItem::Targs: + out << "<targs></targs>"; + break; + case InlineItem::Entry: + writeEntry( item ); + break; + + case InlineItem::Hold: + case InlineItem::Exec: + writePtrMod( item, context ); + break; + + case InlineItem::LmSwitch: + writeLmSwitch( item ); + break; + case InlineItem::LmSetActId: + out << "<set_act>" << + item->longestMatchPart->longestMatchId << + "</set_act>"; + break; + case InlineItem::LmSetTokEnd: + out << "<set_tokend>1</set_tokend>"; + break; + case InlineItem::LmOnLast: + writeLmOnLast( item ); + break; + case InlineItem::LmOnNext: + writeLmOnNext( item ); + break; + case InlineItem::LmOnLagBehind: + writeLmOnLagBehind( item ); + break; + case InlineItem::LmInitAct: + out << "<init_act></init_act>"; + break; + case InlineItem::LmInitTokStart: + out << "<init_tokstart></init_tokstart>"; + break; + case InlineItem::LmSetTokStart: + out << "<set_tokstart></set_tokstart>"; + break; + } + } +} + +void XMLCodeGen::writeAction( Action *action ) +{ + out << " <action id=\"" << action->actionId << "\""; + if ( action->name != 0 ) + out << " name=\"" << action->name << "\""; + out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">"; + writeInlineList( action->inlineList, 0 ); + out << "</action>\n"; +} + +void xmlEscapeHost( std::ostream &out, char *data, int len ) +{ + char *end = data + len; + while ( data != end ) { + switch ( *data ) { + case '<': out << "<"; break; + case '>': out << ">"; break; + case '&': out << "&"; break; + default: out << *data; break; + } + data += 1; + } +} + +void XMLCodeGen::writeStateActions( StateAp *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { + out << " <state_actions>"; + if ( toStateActions != 0 ) + out << toStateActions->id; + else + out << "x"; + + if ( fromStateActions != 0 ) + out << " " << fromStateActions->id; + else + out << " x"; + + if ( eofActions != 0 ) + out << " " << eofActions->id; + else + out << " x"; out << "</state_actions>\n"; + } +} + +void XMLCodeGen::writeStateConditions( StateAp *state ) +{ + if ( state->stateCondList.length() > 0 ) { + out << " <cond_list length=\"" << state->stateCondList.length() << "\">\n"; + for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) { + out << " <c>"; + writeKey( scdi->lowKey ); + out << " "; + writeKey( scdi->highKey ); + out << " "; + out << scdi->condSpace->condSpaceId; + out << "</c>\n"; + } + out << " </cond_list>\n"; + } +} + +void XMLCodeGen::writeStateList() +{ + /* Write the list of states. */ + out << " <state_list length=\"" << fsm->stateList.length() << "\">\n"; + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + out << " <state id=\"" << st->alg.stateNum << "\""; + if ( st->isFinState() ) + out << " final=\"t\""; + out << ">\n"; + + writeStateActions( st ); + writeStateConditions( st ); + writeTransList( st ); + + out << " </state>\n"; + + if ( !st.last() ) + out << "\n"; + + } + out << " </state_list>\n"; +} + +void XMLCodeGen::writeEntryPoints() +{ + /* List of entry points other than start state. */ + if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) { + out << " <entry_points"; + if ( pd->lmRequiresErrorState ) + out << " error=\"t\""; + out << ">\n"; + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = pd->nameIndex[en->key]; + StateAp *state = en->value; + out << " <entry name=\"" << nameInst->name << "\">" << + state->alg.stateNum << "</entry>\n"; + } + out << " </entry_points>\n"; + } +} + +void XMLCodeGen::writeMachine() +{ + fsm->setStateNumbers(); + + /* Open the machine. */ + out << " <machine>\n"; + + /* Action tables. */ + reduceActionTables(); + + writeActionList(); + writeActionTableList(); + writeConditions(); + + /* Start state. */ + out << " <start_state>" << fsm->startState->alg.stateNum << + "</start_state>\n"; + + writeEntryPoints(); + writeStateList(); + + out << " </machine>\n"; +} + +void XMLCodeGen::writeAlphType() +{ + out << " <alphtype>" << + (keyOps->alphType - hostLang->hostTypes) << "</alphtype>\n"; +} + +void XMLCodeGen::writeGetKeyExpr() +{ + out << " <getkey>"; + writeInlineList( pd->getKeyExpr, 0 ); + out << "</getkey>\n"; +} + +void XMLCodeGen::writeAccessExpr() +{ + out << " <access>"; + writeInlineList( pd->accessExpr, 0 ); + out << "</access>\n"; +} + +void XMLCodeGen::writeCurStateExpr() +{ + out << " <curstate>"; + writeInlineList( pd->curStateExpr, 0 ); + out << "</curstate>\n"; +} + +void XMLCodeGen::writeConditions() +{ + if ( condData->condSpaceMap.length() > 0 ) { + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + out << " <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n"; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + out << " <cond_space id=\"" << cs->condSpaceId << + "\" length=\"" << cs->condSet.length() << "\">"; + writeKey( cs->baseKey ); + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + out << " " << (*csi)->actionId; + out << "</cond_space>\n"; + } + out << " </cond_space_list>\n"; + } +} + +void XMLCodeGen::writeXML() +{ + /* Open the definition. */ + out << "<ragel_def name=\"" << fsmName << "\">\n"; + writeAlphType(); + + if ( pd->getKeyExpr != 0 ) + writeGetKeyExpr(); + + if ( pd->accessExpr != 0 ) + writeAccessExpr(); + + if ( pd->curStateExpr != 0 ) + writeCurStateExpr(); + + writeMachine(); + + out << + "</ragel_def>\n"; +} + diff --git a/ragel/xmlcodegen.h b/ragel/xmlcodegen.h new file mode 100644 index 0000000..ab08bc2 --- /dev/null +++ b/ragel/xmlcodegen.h @@ -0,0 +1,135 @@ +/* + * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XMLDOTGEN_H +#define _XMLDOTGEN_H + +#include <iostream> +#include "avltree.h" +#include "fsmgraph.h" +#include "parsedata.h" + +/* Forwards. */ +struct TransAp; +struct FsmAp; +struct ParseData; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + TransAp *trans; + TransAp *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( TransAp *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +class XMLCodeGen +{ +public: + XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, std::ostream &out ); + void writeXML( ); + +private: + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans ); + void writeStateActions( StateAp *state ); + void writeStateList(); + void writeStateConditions( StateAp *state ); + + void writeKey( Key key ); + void writeText( InlineItem *item ); + void writeCtrlFlow( InlineItem *item, InlineItem *context ); + void writePtrMod( InlineItem *item, InlineItem *context ); + void writeGoto( InlineItem *item, InlineItem *context ); + void writeGotoExpr( InlineItem *item, InlineItem *context ); + void writeCall( InlineItem *item, InlineItem *context ); + void writeCallExpr( InlineItem *item, InlineItem *context ); + void writeNext( InlineItem *item, InlineItem *context ); + void writeNextExpr( InlineItem *item, InlineItem *context ); + void writeEntry( InlineItem *item ); + void writeLmSetActId( InlineItem *item ); + void writeLmOnLast( InlineItem *item ); + void writeLmOnNext( InlineItem *item ); + void writeLmOnLagBehind( InlineItem *item ); + + void writeEntryPoints(); + void writeGetKeyExpr(); + void writeAccessExpr(); + void writeCurStateExpr(); + void writeConditions(); + void writeInlineList( InlineList *inlineList, InlineItem *context ); + void writeAlphType(); + void writeActionList(); + void writeActionTableList(); + void reduceTrans( TransAp *trans ); + void reduceActionTables(); + void writeTransList( StateAp *state ); + void writeTrans( Key lowKey, Key highKey, TransAp *defTrans ); + void writeAction( Action *action ); + void writeLmSwitch( InlineItem *item ); + void writeMachine(); + void writeActionExec( InlineItem *item ); + void writeActionExecTE( InlineItem *item ); + + char *fsmName; + ParseData *pd; + FsmAp *fsm; + std::ostream &out; + ActionTableMap actionTableMap; + int nextActionTableId; +}; + + +#endif /* _XMLDOTGEN_H */ diff --git a/rlcodegen/Makefile.in b/rlcodegen/Makefile.in new file mode 100644 index 0000000..c6a9838 --- /dev/null +++ b/rlcodegen/Makefile.in @@ -0,0 +1,93 @@ +# +# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INCS += -I../common -I../aapl +DEFS += + +CFLAGS += -g -Wall +LDFLAGS += + +CC_SRCS = \ + gendata.cpp xmltags.cpp xmlscan.cpp xmlparse.cpp \ + main.cpp redfsm.cpp gvdotgen.cpp fsmcodegen.cpp \ + tabcodegen.cpp ftabcodegen.cpp flatcodegen.cpp \ + fflatcodegen.cpp gotocodegen.cpp fgotocodegen.cpp \ + ipgotocodegen.cpp splitcodegen.cpp javacodegen.cpp + +GEN_SRC = xmltags.cpp xmlscan.cpp xmlparse.cpp xmlparse.h + +LIBS += @LIBS@ +PREFIX += @prefix@ + +BUILD_PARSERS = @BUILD_PARSERS@ + +#************************************* + +# Programs +CXX = @CXX@ + +# Get objects and dependencies from sources. +OBJS = $(CC_SRCS:%.cpp=%.o) +DEPS = $(CC_SRCS:%.cpp=.%.d) + +# Get the version info. +include ../version.mk + +# Rules. +all: rlcodegen + +rlcodegen: $(GEN_SRC) $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + +ifeq ($(BUILD_PARSERS),true) + +xmlparse.h: xmlparse.kh + kelbt -o $@ $< + +xmlparse.cpp: xmlparse.kl xmlparse.kh + kelbt -o $@ $< + +xmlscan.cpp: xmlscan.rl + ragel xmlscan.rl | rlcodegen -G2 -o xmlscan.cpp + +xmltags.cpp: xmltags.gperf + gperf -L C++ -t $< > $@ + +endif + +%.o: %.cpp + @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d + $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $< + +distclean: clean + rm -f Makefile + +ifeq ($(BUILD_PARSERS),true) +EXTRA_CLEAN = $(GEN_SRC) +endif + +clean: + rm -f tags .*.d *.o rlcodegen $(EXTRA_CLEAN) + +install: all + install -d $(PREFIX)/bin + install -s rlcodegen $(PREFIX)/bin/rlcodegen + +-include $(DEPS) diff --git a/rlcodegen/fflatcodegen.cpp b/rlcodegen/fflatcodegen.cpp new file mode 100644 index 0000000..25f4d24 --- /dev/null +++ b/rlcodegen/fflatcodegen.cpp @@ -0,0 +1,364 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "fflatcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +std::ostream &FFlatCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FFlatCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FFlatCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + out << act; + return out; +} + +/* Write out the function for a transition. */ +std::ostream &FFlatCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + int action = 0; + if ( trans->action != 0 ) + action = trans->action->actListId+1; + out << action; + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FFlatCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void FFlatCodeGen::writeOutData() +{ + if ( anyConditions() ) { + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondSpan), CSP() ); + COND_KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCond), C() ); + CONDS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondIndexOffset), CO() ); + COND_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxSpan), SP() ); + KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxFlatIndexOffset), IO() ); + FLAT_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActListId), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } +} + +void FFlatCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _slen"; + + if ( anyRegCurStateRef() ) + out << ", _ps"; + + out << ";\n"; + out << " int _trans"; + + if ( anyConditions() ) + out << ", _cond"; + + out << ";\n"; + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " " << PTR_CONST() << ARRAY_TYPE(maxIndex) << POINTER() << "_inds;\n"; + + if ( anyConditions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(maxCond) << POINTER() << "_conds;\n" + " " << WIDE_ALPH_TYPE() << " _widec;\n"; + } + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + if ( anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n\n"; + + if ( anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " switch ( " << TA() << "[_trans] ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() ) + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FFlatCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/fflatcodegen.h b/rlcodegen/fflatcodegen.h new file mode 100644 index 0000000..7cfbd66 --- /dev/null +++ b/rlcodegen/fflatcodegen.h @@ -0,0 +1,70 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FFLATCODEGEN_H +#define _FFLATCODEGEN_H + +#include <iostream> +#include "flatcodegen.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * FFlatCodeGen + */ +class FFlatCodeGen : public FlatCodeGen +{ +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); +}; + +/* + * CFFlatCodeGen + */ +struct CFFlatCodeGen + : public FFlatCodeGen, public CCodeGen +{ +}; + +/* + * DFFlatCodeGen + */ +struct DFFlatCodeGen + : public FFlatCodeGen, public DCodeGen +{ +}; + +#endif /* _FFLATCODEGEN_H */ diff --git a/rlcodegen/fgotocodegen.cpp b/rlcodegen/fgotocodegen.cpp new file mode 100644 index 0000000..34ef47c --- /dev/null +++ b/rlcodegen/fgotocodegen.cpp @@ -0,0 +1,276 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "fgotocodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" + +std::ostream &FGotoCodeGen::EXEC_ACTIONS() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* We are at the start of a glob, write the case. */ + out << "f" << redAct->actListId << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tgoto _again;\n"; + } + } + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FGotoCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FGotoCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FGotoCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FGotoCodeGen::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* States that are final and have an out action need a case. */ + if ( st->eofAction != 0 ) { + /* Write the case label. */ + out << "\t\tcase " << st->id << ": "; + + /* Jump to the func. */ + out << "goto f" << st->eofAction->actListId << ";\n"; + } + } + + return out; +} + +unsigned int FGotoCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + return act; +} + +unsigned int FGotoCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + return act; +} + +unsigned int FGotoCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + return act; +} + +void FGotoCodeGen::writeOutData() +{ + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } +} + +void FGotoCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << " {\n"; + + if ( anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + out << + " switch ( " << CS() << " ) {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( anyRegActions() ) + EXEC_ACTIONS() << "\n"; + + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FGotoCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/fgotocodegen.h b/rlcodegen/fgotocodegen.h new file mode 100644 index 0000000..e971877 --- /dev/null +++ b/rlcodegen/fgotocodegen.h @@ -0,0 +1,70 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FGOTOCODEGEN_H +#define _FGOTOCODEGEN_H + +#include <iostream> +#include "gotocodegen.h" + +/* Forwards. */ +struct CodeGenData; + + +/* + * class FGotoCodeGen + */ +class FGotoCodeGen : public GotoCodeGen +{ +public: + std::ostream &EXEC_ACTIONS(); + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &FINISH_CASES(); + std::ostream &EOF_ACTION_SWITCH(); + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); +}; + +/* + * class CFGotoCodeGen + */ +struct CFGotoCodeGen + : public FGotoCodeGen, public CCodeGen +{ +}; + +/* + * class DFGotoCodeGen + */ +struct DFGotoCodeGen + : public FGotoCodeGen, public DCodeGen +{ +}; + +#endif /* _FGOTOCODEGEN_H */ diff --git a/rlcodegen/flatcodegen.cpp b/rlcodegen/flatcodegen.cpp new file mode 100644 index 0000000..d5e96d3 --- /dev/null +++ b/rlcodegen/flatcodegen.cpp @@ -0,0 +1,777 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "flatcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +std::ostream &FlatCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + /* If there are actions, emit them. Otherwise emit zero. */ + int act = 0; + if ( trans->action != 0 ) + act = trans->action->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FlatCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FlatCodeGen::FLAT_INDEX_OFFSET() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + if ( st->transList != 0 ) + curIndOffset += keyOps->span( st->lowKey, st->highKey ); + + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::KEY_SPANS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + unsigned long long span = 0; + if ( st->transList != 0 ) + span = keyOps->span( st->lowKey, st->highKey ); + out << span; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::TO_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit just cond low key and cond high key. */ + out << KEY( st->condLowKey ) << ", "; + out << KEY( st->condHighKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_KEY_SPANS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + unsigned long long span = 0; + if ( st->condList != 0 ) + span = keyOps->span( st->condLowKey, st->condHighKey ); + out << span; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::CONDS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->condList != 0 ) { + /* Walk the singles. */ + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->condList[pos] != 0 ) + out << st->condList[pos]->condSpaceId + 1 << ", "; + else + out << "0, "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_INDEX_OFFSET() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + if ( st->condList != 0 ) + curIndOffset += keyOps->span( st->condLowKey, st->condHighKey ); + } + out << "\n"; + return out; +} + + +std::ostream &FlatCodeGen::KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit just low key and high key. */ + out << KEY( st->lowKey ) << ", "; + out << KEY( st->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::INDICIES() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList != 0 ) { + /* Walk the singles. */ + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + for ( unsigned long long pos = 0; pos < span; pos++ ) { + out << st->transList[pos]->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + out << st->defTrans->id << ", "; + + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::TRANS_TARGS() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalStates = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write out the target state. */ + RedTransAp *trans = transPtrs[t]; + out << trans->targ->id; + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalStates % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + + +std::ostream &FlatCodeGen::TRANS_ACTIONS() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalAct = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write the function for the transition. */ + RedTransAp *trans = transPtrs[t]; + TRANS_ACTION( trans ); + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalAct % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + +void FlatCodeGen::LOCATE_TRANS() +{ + out << + " _keys = " << ARR_OFF( K(), "(" + CS() + "<<1)" ) << ";\n" + " _inds = " << ARR_OFF( I(), IO() + "[" + CS() + "]" ) << ";\n" + "\n" + " _slen = " << SP() << "[" << CS() << "];\n" + " _trans = _inds[ _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n" + " " << GET_WIDE_KEY() << " <= _keys[1] ?\n" + " " << GET_WIDE_KEY() << " - _keys[0] : _slen ];\n" + "\n"; +} + +void FlatCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void FlatCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void FlatCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void FlatCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void FlatCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + + +void FlatCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + + +void FlatCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void FlatCodeGen::writeOutData() +{ + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyConditions() ) { + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondSpan), CSP() ); + COND_KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCond), C() ); + CONDS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondIndexOffset), CO() ); + COND_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxSpan), SP() ); + KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxFlatIndexOffset), IO() ); + FLAT_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } +} + +void FlatCodeGen::COND_TRANSLATE() +{ + out << + " _widec = " << GET_KEY() << ";\n"; + + out << + " _keys = " << ARR_OFF( CK(), "(" + CS() + "<<1)" ) << ";\n" + " _conds = " << ARR_OFF( C(), CO() + "[" + CS() + "]" ) << ";\n" + "\n" + " _slen = " << CSP() << "[" << CS() << "];\n" + " _cond = _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n" + " " << GET_WIDE_KEY() << " <= _keys[1] ?\n" + " _conds[" << GET_WIDE_KEY() << " - _keys[0]] : 0;\n" + "\n"; + + out << + " switch ( _cond ) {\n"; + for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) { + CondSpace *condSpace = csi; + out << " case " << condSpace->condSpaceId + 1 << ": {\n"; + out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(2) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } + + out << " }\n"; + out << " break;\n"; + } + + SWITCH_DEFAULT(); + + out << + " }\n"; +} + +void FlatCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _slen"; + + if ( anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " int _trans"; + + if ( anyConditions() ) + out << ", _cond"; + out << ";\n"; + + if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " " << PTR_CONST() << ARRAY_TYPE(maxIndex) << POINTER() << "_inds;\n"; + + if ( anyConditions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(maxCond) << POINTER() << "_conds;\n" + " " << WIDE_ALPH_TYPE() << " _widec;\n"; + } + + out << "\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + if ( anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *(_acts++) )\n {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() ) + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FlatCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/flatcodegen.h b/rlcodegen/flatcodegen.h new file mode 100644 index 0000000..a2ccb1d --- /dev/null +++ b/rlcodegen/flatcodegen.h @@ -0,0 +1,103 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FLATCODEGEN_H +#define _FLATCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +/* + * FlatCodeGen + */ +class FlatCodeGen : virtual public FsmCodeGen +{ +public: + virtual ~FlatCodeGen() { } + +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &KEYS(); + std::ostream &INDICIES(); + std::ostream &FLAT_INDEX_OFFSET(); + std::ostream &KEY_SPANS(); + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + std::ostream &TRANS_TARGS(); + std::ostream &TRANS_ACTIONS(); + void LOCATE_TRANS(); + + std::ostream &COND_INDEX_OFFSET(); + void COND_TRANSLATE(); + std::ostream &CONDS(); + std::ostream &COND_KEYS(); + std::ostream &COND_KEY_SPANS(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); +}; + +/* + * CFlatCodeGen + */ +struct CFlatCodeGen + : public FlatCodeGen, public CCodeGen +{ +}; + +/* + * DFlatCodeGen + */ +struct DFlatCodeGen + : public FlatCodeGen, public DCodeGen +{ +}; + +#endif /* _FLATCODEGEN_H */ diff --git a/rlcodegen/fsmcodegen.cpp b/rlcodegen/fsmcodegen.cpp new file mode 100644 index 0000000..b9aa458 --- /dev/null +++ b/rlcodegen/fsmcodegen.cpp @@ -0,0 +1,1012 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include <sstream> +#include <string> +#include <assert.h> + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + + +/* Determine if a string is only whitespace. Code blocks that are only + * whitespace need not be output. */ +bool onlyWhitespace( char *str ) +{ + while ( *str != 0 ) { + if ( *str != ' ' && *str != '\t' && *str != '\n' && + *str != '\v' && *str != '\f' && *str != '\r' ) + return false; + str += 1; + } + return true; +} + +/* Init code gen with in parameters. */ +FsmCodeGen::FsmCodeGen( ) +: + fsmName(0), + cgd(0), + redFsm(0), + out(*outStream), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionRets(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyLmSwitchError(false), + bAnyConditions(false) +{ +} + +/* Does the machine have any actions. */ +bool FsmCodeGen::anyActions() +{ + return redFsm->actionMap.length() > 0; +} + +void FsmCodeGen::findFinalActionRefs() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count out of range transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count default transition. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 ) { + st->defTrans->action->numTransRefs += 1; + for ( ActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( ActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( ActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + } +} + +/* Assign ids to referenced actions. */ +void FsmCodeGen::assignActionIds() +{ + int nextActionId = 0; + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( act->numRefs() > 0 ) + act->actionId = nextActionId++; + } +} + +void FsmCodeGen::setValueLimits() +{ + maxSingleLen = 0; + maxRangeLen = 0; + maxKeyOffset = 0; + maxIndexOffset = 0; + maxActListId = 0; + maxActionLoc = 0; + maxActArrItem = 0; + maxSpan = 0; + maxCondSpan = 0; + maxFlatIndexOffset = 0; + maxCondOffset = 0; + maxCondLen = 0; + maxCondSpaceId = 0; + maxCondIndexOffset = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + maxIndex = redFsm->transSet.length(); + maxCond = cgd->condSpaceList.length(); + + /* The nextStateId - 1 is the last state id assigned. */ + maxState = redFsm->nextStateId - 1; + + for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) { + if ( csi->condSpaceId > maxCondSpaceId ) + maxCondSpaceId = csi->condSpaceId; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Maximum cond length. */ + if ( st->stateCondList.length() > maxCondLen ) + maxCondLen = st->stateCondList.length(); + + /* Maximum single length. */ + if ( st->outSingle.length() > maxSingleLen ) + maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > maxRangeLen ) + maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + maxCondOffset += st->stateCondList.length(); + maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; + } + + /* Max cond span. */ + if ( st->condList != 0 ) { + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + if ( span > maxCondSpan ) + maxCondSpan = span; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + if ( span > maxSpan ) + maxSpan = span; + } + + /* Max cond index offset. */ + if ( ! st.last() ) { + if ( st->condList != 0 ) + maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey ); + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); + maxFlatIndexOffset += 1; + } + } + + for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > maxActListId ) + maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > maxActionLoc ) + maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > maxActArrItem ) + maxActArrItem = at->key.length(); + for ( ActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > maxActArrItem ) + maxActArrItem = item->value->actionId; + } + } +} + +void FsmCodeGen::analyzeAction( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Only consider actions that are referenced. */ + if ( act->numRefs() > 0 ) { + if ( item->type == InlineItem::Goto || item->type == InlineItem::GotoExpr ) + bAnyActionGotos = true; + else if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + bAnyActionCalls = true; + else if ( item->type == InlineItem::Ret ) + bAnyActionRets = true; + } + + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) { + /* Any returns in regular actions? */ + if ( item->type == InlineItem::Ret ) + bAnyRegActionRets = true; + + /* Any next statements in the regular actions? */ + if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr ) + bAnyRegNextStmt = true; + + /* Any by value control in regular actions? */ + if ( item->type == InlineItem::CallExpr || item->type == InlineItem::GotoExpr ) + bAnyRegActionByValControl = true; + + /* Any references to the current state in regular actions? */ + if ( item->type == InlineItem::Curs ) + bAnyRegCurStateRef = true; + + if ( item->type == InlineItem::Break ) + bAnyRegBreak = true; + + if ( item->type == InlineItem::LmSwitch && item->handlesError ) + bAnyLmSwitchError = true; + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void FsmCodeGen::analyzeActionList( RedAction *redAct, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Any next statements in the action table? */ + if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr ) + redAct->bAnyNextStmt = true; + + /* Any references to the current state. */ + if ( item->type == InlineItem::Curs ) + redAct->bAnyCurStateRef = true; + + if ( item->type == InlineItem::Break ) + redAct->bAnyBreakStmt = true; + + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Gather various info on the machine. */ +void FsmCodeGen::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + bAnyRegActions = true; + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + for ( ActionTable::Iter act = redAct->key; act.lte(); act++ ) + analyzeActionList( redAct, act->value->inlineList ); + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 && + st->defTrans->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + + if ( st->stateCondList.length() > 0 ) + bAnyConditions = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); + + /* Determine if we should use indicies. */ + calcIndexSize(); +} + +unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + return arrayType->size; +} + +string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + + string ret = arrayType->data1; + if ( arrayType->data2 != 0 ) { + ret += " "; + ret += arrayType->data2; + } + return ret; +} + + +/* Write out the fsm name. */ +string FsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string FsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +/* Write out the array of actions. */ +std::ostream &FsmCodeGen::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( ActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +string FsmCodeGen::CS() +{ + ostringstream ret; + if ( cgd->curStateExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, cgd->curStateExpr, 0, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << ACCESS() << "cs"; + } + return ret.str(); +} + +string FsmCodeGen::ACCESS() +{ + ostringstream ret; + if ( cgd->accessExpr != 0 ) + INLINE_LIST( ret, cgd->accessExpr, 0, false ); + return ret.str(); +} + +string FsmCodeGen::GET_WIDE_KEY() +{ + if ( anyConditions() ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state ) +{ + if ( state->stateCondList.length() > 0 ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_KEY() +{ + ostringstream ret; + if ( cgd->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, cgd->getKeyExpr, 0, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "(*" << P() << ")"; + } + return ret.str(); +} + +/* Write out level number of tabs. Makes the nested binary search nice + * looking. */ +string FsmCodeGen::TABS( int level ) +{ + string result; + while ( level-- > 0 ) + result += "\t"; + return result; +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string FsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + if ( keyOps->isSigned || !hostLang->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << 'u'; + return ret.str(); +} + +void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << "{" << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "))-1;}"; +} + +void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ) +{ + /* Tokend version of exec. */ + + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << "{" << TOKEND() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "));}"; +} + + +void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, + int targState, int inFinish ) +{ + ret << + " switch( act ) {\n"; + + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->handlesError ) { + ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; "; + GOTO( ret, redFsm->errState->id, inFinish ); + ret << "\n"; + } + + for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + /* Write the case label, the action and the case break. */ + ret << " case " << lma->lmId << ":\n"; + + /* Write the block and close it off. */ + ret << " {"; + INLINE_LIST( ret, lma->children, targState, inFinish ); + ret << "}\n"; + + ret << " break;\n"; + } + /* Default required for D code. */ + ret << + " default: break;\n" + " }\n" + "\t"; +} + +void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = " << item->lmId << ";"; +} + +void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P(); + if ( item->offset != 0 ) + out << "+" << item->offset; + out << ";"; +} + +void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item ) +{ + ret << TOKEND(); +} + +void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << NULL_ITEM() << ";"; +} + +void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item, + int targState, bool inFinish ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << "{"; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "}"; + } +} + + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + ret << item->data; + break; + case InlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case InlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case InlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case InlineItem::Ret: + RET( ret, inFinish ); + break; + case InlineItem::PChar: + ret << P(); + break; + case InlineItem::Char: + ret << GET_KEY(); + break; + case InlineItem::Hold: + ret << P() << "--;"; + break; + case InlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case InlineItem::HoldTE: + ret << TOKEND() << "--;"; + break; + case InlineItem::ExecTE: + EXECTE( ret, item, targState, inFinish ); + break; + case InlineItem::Curs: + CURS( ret, inFinish ); + break; + case InlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case InlineItem::Entry: + ret << item->targState->id; + break; + case InlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case InlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case InlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case InlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish ); + break; + case InlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case InlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case InlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case InlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case InlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case InlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case InlineItem::SubAction: + SUB_ACTION( ret, item, targState, inFinish ); + break; + case InlineItem::Break: + BREAK( ret, targState ); + break; + } + } +} +/* Write out paths in line directives. Escapes any special characters. */ +string FsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish ) +{ + /* Write the preprocessor line info for going into the source file. */ + lineDirective( ret, cgd->fileName, action->loc.line ); + + /* Write the block and close it off. */ + ret << "\t{"; + INLINE_LIST( ret, action->inlineList, targState, inFinish ); + ret << "}\n"; +} + +void FsmCodeGen::CONDITION( ostream &ret, Action *condition ) +{ + ret << "\n"; + lineDirective( ret, cgd->fileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false ); +} + +string FsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string FsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void FsmCodeGen::writeOutInit() +{ + out << " {\n"; + out << "\t" << CS() << " = " << START() << ";\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( anyActionCalls() || anyActionRets() ) + out << "\t" << TOP() << " = 0;\n"; + + if ( cgd->hasLongestMatch ) { + out << + " " << TOKSTART() << " = " << NULL_ITEM() << ";\n" + " " << TOKEND() << " = " << NULL_ITEM() << ";\n" + " " << ACT() << " = 0;\n"; + } + out << " }\n"; +} + +string FsmCodeGen::DATA_PREFIX() +{ + if ( cgd->dataPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::ALPH_TYPE() +{ + string ret = keyOps->alphType->data1; + if ( keyOps->alphType->data2 != 0 ) { + ret += " "; + ret += + keyOps->alphType->data2; + } + return ret; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::WIDE_ALPH_TYPE() +{ + string ret; + if ( maxKey <= keyOps->maxKey ) + ret = ALPH_TYPE(); + else { + long long maxKeyVal = maxKey.getLongLong(); + HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); + assert( wideType != 0 ); + + ret = wideType->data1; + if ( wideType->data2 != 0 ) { + ret += " "; + ret += wideType->data2; + } + } + return ret; +} + + +/* + * Language specific, but style independent code generators functions. + */ + +string CCodeGen::PTR_CONST() +{ + return "const "; +} + +std::ostream &CCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &CCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &CCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string CCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string CCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string CCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +string CCodeGen::NULL_ITEM() +{ + return "0"; +} + +string CCodeGen::POINTER() +{ + return " *"; +} + +std::ostream &CCodeGen::SWITCH_DEFAULT() +{ + return out; +} + +string CCodeGen::CTRL_FLOW() +{ + return ""; +} + +/* + * D Specific + */ + +string DCodeGen::NULL_ITEM() +{ + return "null"; +} + +string DCodeGen::POINTER() +{ + // multiple items seperated by commas can also be pointer types. + return "* "; +} + +string DCodeGen::PTR_CONST() +{ + return ""; +} + +std::ostream &DCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << "[] " << name << " = [\n"; + return out; +} + +std::ostream &DCodeGen::CLOSE_ARRAY() +{ + return out << "];\n"; +} + +std::ostream &DCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string DCodeGen::ARR_OFF( string ptr, string offset ) +{ + return "&" + ptr + "[" + offset + "]"; +} + +string DCodeGen::CAST( string type ) +{ + return "cast(" + type + ")"; +} + +string DCodeGen::UINT( ) +{ + return "uint"; +} + +std::ostream &DCodeGen::SWITCH_DEFAULT() +{ + out << " default: break;\n"; + return out; +} + +string DCodeGen::CTRL_FLOW() +{ + return "if (true) "; +} + + +/* + * Java Specific + */ + +string JavaCodeGen::PTR_CONST() +{ + /* Not used in Java code. */ + assert( false ); + return "final"; +} + +std::ostream &JavaCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static final " << type << "[] " << name << " = {\n"; + return out; +} + +std::ostream &JavaCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &JavaCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static final " << type << " " << name; + return out; +} + +string JavaCodeGen::UINT( ) +{ + /* Not used. */ + assert( false ); + return "long"; +} + +string JavaCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string JavaCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +string JavaCodeGen::NULL_ITEM() +{ + /* In java we use integers instead of pointers. */ + return "-1"; +} + +string JavaCodeGen::POINTER() +{ + /* Not used. */ + assert( false ); + return " *"; +} + +std::ostream &JavaCodeGen::SWITCH_DEFAULT() +{ + return out; +} + +string JavaCodeGen::GET_KEY() +{ + ostringstream ret; + if ( cgd->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, cgd->getKeyExpr, 0, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "data[" << P() << "]"; + } + return ret.str(); +} + +string JavaCodeGen::CTRL_FLOW() +{ + return "if (true) "; +} + diff --git a/rlcodegen/fsmcodegen.h b/rlcodegen/fsmcodegen.h new file mode 100644 index 0000000..777c6fd --- /dev/null +++ b/rlcodegen/fsmcodegen.h @@ -0,0 +1,297 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMCODEGEN_H +#define _FSMCODEGEN_H + +#include <iostream> +#include <string> +#include <stdio.h> +#include "common.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL 8 + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct Action; +struct NameInst; +struct InlineItem; +struct InlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; + +inline string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +/* + * class FsmCodeGen + */ +class FsmCodeGen +{ +public: + FsmCodeGen(); + virtual ~FsmCodeGen() {} + + virtual void writeOutData() = 0; + virtual void writeOutInit(); + virtual void writeOutExec() = 0; + virtual void writeOutEOF() = 0; + + /* Gather various info on the machine. */ + void analyzeAction( Action *act, InlineList *inlineList ); + void analyzeActionList( RedAction *redAct, InlineList *inlineList ); + void analyzeMachine(); + +protected: + friend struct CodeGenData; + + string FSM_NAME(); + string START_STATE_ID(); + ostream &ACTIONS_ARRAY(); + string GET_WIDE_KEY(); + string GET_WIDE_KEY( RedStateAp *state ); + string TABS( int level ); + string KEY( Key key ); + string LDIR_PATH( char *path ); + void ACTION( ostream &ret, Action *action, int targState, bool inFinish ); + void CONDITION( ostream &ret, Action *condition ); + string ALPH_TYPE(); + string WIDE_ALPH_TYPE(); + string ARRAY_TYPE( unsigned long maxVal ); + + virtual string ARR_OFF( string ptr, string offset ) = 0; + virtual string CAST( string type ) = 0; + virtual string UINT() = 0; + virtual string NULL_ITEM() = 0; + virtual string POINTER() = 0; + virtual string GET_KEY(); + virtual ostream &SWITCH_DEFAULT() = 0; + + string P() { return "p"; } + string PE() { return "pe"; } + + string ACCESS(); + string CS(); + string STACK() { return ACCESS() + "stack"; } + string TOP() { return ACCESS() + "top"; } + string TOKSTART() { return ACCESS() + "tokstart"; } + string TOKEND() { return ACCESS() + "tokend"; } + string ACT() { return ACCESS() + "act"; } + + string DATA_PREFIX(); + string PM() { return "_" + DATA_PREFIX() + "partition_map"; } + string C() { return "_" + DATA_PREFIX() + "cond_spaces"; } + string CK() { return "_" + DATA_PREFIX() + "cond_keys"; } + string K() { return "_" + DATA_PREFIX() + "trans_keys"; } + string I() { return "_" + DATA_PREFIX() + "indicies"; } + string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; } + string KO() { return "_" + DATA_PREFIX() + "key_offsets"; } + string IO() { return "_" + DATA_PREFIX() + "index_offsets"; } + string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; } + string SL() { return "_" + DATA_PREFIX() + "single_lengths"; } + string RL() { return "_" + DATA_PREFIX() + "range_lengths"; } + string A() { return "_" + DATA_PREFIX() + "actions"; } + string TA() { return "_" + DATA_PREFIX() + "trans_actions_wi"; } + string TT() { return "_" + DATA_PREFIX() + "trans_targs_wi"; } + string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; } + string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; } + string EA() { return "_" + DATA_PREFIX() + "eof_actions"; } + string SP() { return "_" + DATA_PREFIX() + "key_spans"; } + string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; } + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + string CTXDATA() { return DATA_PREFIX() + "ctxdata"; } + + void INLINE_LIST( ostream &ret, InlineList *inlineList, int targState, bool inFinish ); + virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0; + virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0; + virtual void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0; + virtual void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0; + virtual void CALL_EXPR( ostream &ret, InlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void RET( ostream &ret, bool inFinish ) = 0; + virtual void BREAK( ostream &ret, int targState ) = 0; + virtual void CURS( ostream &ret, bool inFinish ) = 0; + virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0; + void EXEC( ostream &ret, InlineItem *item, int targState, int inFinish ); + void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, InlineItem *item ); + void INIT_TOKSTART( ostream &ret, InlineItem *item ); + void INIT_ACT( ostream &ret, InlineItem *item ); + void SET_TOKSTART( ostream &ret, InlineItem *item ); + void SET_TOKEND( ostream &ret, InlineItem *item ); + void GET_TOKEND( ostream &ret, InlineItem *item ); + void SUB_ACTION( ostream &ret, InlineItem *item, + int targState, bool inFinish ); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + virtual string PTR_CONST() = 0; + virtual ostream &OPEN_ARRAY( string type, string name ) = 0; + virtual ostream &CLOSE_ARRAY() = 0; + virtual ostream &STATIC_VAR( string type, string name ) = 0; + + virtual string CTRL_FLOW() = 0; + + unsigned int arrayTypeSize( unsigned long maxVal ); + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool anyLmSwitchError() { return bAnyLmSwitchError; } + bool anyConditions() { return bAnyConditions; } + + /* Set up labelNeeded flag for each state. Differs for each goto style so + * is virtual. */ + virtual void setLabelsNeeded() {} + + /* Determine if we should use indicies. */ + virtual void calcIndexSize() {} + + void findFinalActionRefs(); + void assignActionIds(); + void setValueLimits(); + + /* Are there any regular transition functions, any out transition functions. */ + char *fsmName; + CodeGenData *cgd; + RedFsmAp *redFsm; + + bool outLabelUsed; + bool againLabelUsed; + +protected: + ostream &out; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionRets; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyLmSwitchError; + bool bAnyConditions; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + unsigned long long maxCondSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondOffset; + int maxCondLen; + int maxCondSpaceId; + int maxCondIndexOffset; + int maxCond; + + bool useIndicies; +}; + +class CCodeGen : virtual public FsmCodeGen +{ +public: + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( string type, string name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( string type, string name ); + virtual string ARR_OFF( string ptr, string offset ); + virtual string CAST( string type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string CTRL_FLOW(); +}; + +class DCodeGen : virtual public FsmCodeGen +{ +public: + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( string type, string name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( string type, string name ); + virtual string ARR_OFF( string ptr, string offset ); + virtual string CAST( string type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string CTRL_FLOW(); +}; + +class JavaCodeGen : virtual public FsmCodeGen +{ +public: + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( string type, string name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( string type, string name ); + virtual string ARR_OFF( string ptr, string offset ); + virtual string CAST( string type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string GET_KEY(); + virtual string CTRL_FLOW(); +}; + +#endif /* _FSMCODEGEN_H */ diff --git a/rlcodegen/ftabcodegen.cpp b/rlcodegen/ftabcodegen.cpp new file mode 100644 index 0000000..2aba78b --- /dev/null +++ b/rlcodegen/ftabcodegen.cpp @@ -0,0 +1,418 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "ftabcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +/* Determine if we should use indicies or not. */ +void FTabCodeGen::calcIndexSize() +{ + int sizeWithInds = 0, sizeWithoutInds = 0; + + /* Calculate cost of using with indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithInds += arrayTypeSize(maxIndex) * totalIndex; + } + sizeWithInds += arrayTypeSize(maxState) * redFsm->transSet.length(); + if ( anyActions() ) + sizeWithInds += arrayTypeSize(maxActListId) * redFsm->transSet.length(); + + /* Calculate the cost of not using indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithoutInds += arrayTypeSize(maxState) * totalIndex; + if ( anyActions() ) + sizeWithoutInds += arrayTypeSize(maxActListId) * totalIndex; + } + + /* If using indicies reduces the size, use them. */ + useIndicies = sizeWithInds < sizeWithoutInds; +} + +std::ostream &FTabCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FTabCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FTabCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + out << act; + return out; +} + + +/* Write out the function for a transition. */ +std::ostream &FTabCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + int action = 0; + if ( trans->action != 0 ) + action = trans->action->actListId+1; + out << action; + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FTabCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void FTabCodeGen::writeOutData() +{ + if ( anyConditions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxCondOffset), CO() ); + COND_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondLen), CL() ); + COND_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondSpaceId), C() ); + COND_SPACES(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( ARRAY_TYPE(maxKeyOffset), KO() ); + KEY_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxSingleLen), SL() ); + SINGLE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxRangeLen), RL() ); + RANGE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxIndexOffset), IO() ); + INDEX_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + if ( useIndicies ) { + OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS_WI(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() ); + TRANS_ACTIONS_WI(); + CLOSE_ARRAY() << + "\n"; + } + } + else { + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActListId), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } +} + +void FTabCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _klen"; + + if ( anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " int _trans;\n"; + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + out << "\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + out << "_match:\n"; + + if ( anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + if ( useIndicies ) + out << " _trans = " << I() << "[_trans];\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " switch ( " << TA() << "[_trans] ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() ) + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + + +void FTabCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/ftabcodegen.h b/rlcodegen/ftabcodegen.h new file mode 100644 index 0000000..20e4663 --- /dev/null +++ b/rlcodegen/ftabcodegen.h @@ -0,0 +1,72 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FTABCODEGEN_H +#define _FTABCODEGEN_H + +#include <iostream> +#include "tabcodegen.h" + +/* Forwards. */ +struct CodeGenData; + + +/* + * FTabCodeG\verb|e + */ +class FTabCodeGen : public TabCodeGen +{ +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + virtual void calcIndexSize(); + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); +}; + + +/* + * CFTabCodeGen + */ +struct CFTabCodeGen + : public FTabCodeGen, public CCodeGen +{ +}; + +/* + * class DFTabCodeGen + */ +struct DFTabCodeGen + : public FTabCodeGen, public DCodeGen +{ +}; + +#endif /* _FTABCODEGEN_H */ diff --git a/rlcodegen/gendata.cpp b/rlcodegen/gendata.cpp new file mode 100644 index 0000000..419e903 --- /dev/null +++ b/rlcodegen/gendata.cpp @@ -0,0 +1,563 @@ +/* + * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "gendata.h" + +/* Code Generators. */ +#include "gvdotgen.h" +#include "tabcodegen.h" +#include "ftabcodegen.h" +#include "flatcodegen.h" +#include "fflatcodegen.h" +#include "gotocodegen.h" +#include "fgotocodegen.h" +#include "ipgotocodegen.h" +#include "splitcodegen.h" +#include "javacodegen.h" + +#include <iostream> + +using std::cerr; +using std::endl; + +CodeGenData *cgd = 0; + +void CodeGenData::createMachine() +{ + redFsm = new RedFsmAp(); +} + +void CodeGenData::initActionList( unsigned long length ) +{ + allActions = new Action[length]; + for ( unsigned long a = 0; a < length; a++ ) + actionList.append( allActions+a ); +} + +void CodeGenData::newAction( int anum, char *name, int line, + int col, InlineList *inlineList ) +{ + allActions[anum].actionId = anum; + allActions[anum].name = name; + allActions[anum].loc.line = line; + allActions[anum].loc.col = col; + allActions[anum].inlineList = inlineList; +} + +void CodeGenData::initActionTableList( unsigned long length ) +{ + allActionTables = new RedAction[length]; +} + +void CodeGenData::initStateList( unsigned long length ) +{ + allStates = new RedStateAp[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( allStates+s ); +} + +void CodeGenData::setStartState( unsigned long startState ) +{ + this->startState = startState; +} + +void CodeGenData::addEntryPoint( char *name, unsigned long entryState ) +{ + entryPointIds.append( entryState ); + entryPointNames.append( name ); +} + +void CodeGenData::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void CodeGenData::newTrans( int snum, int tnum, Key lowKey, + Key highKey, long targ, long action ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : + wantComplete ? redFsm->getErrorState() : 0; + RedAction *actionTable = action >= 0 ? (allActionTables + action) : 0; + RedTransAp *trans = redFsm->allocateTrans( targState, actionTable ); + RedTransEl transEl( lowKey, highKey, trans ); + + if ( wantComplete ) { + /* If the machine is to be complete then we need to fill any gaps with + * the error transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->minKey < lowKey ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + nextKey.increment(); + if ( nextKey < lowKey ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void CodeGenData::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + /* If building a complete machine we may need filler on the end. */ + if ( wantComplete ) { + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + fillLowKey.increment(); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } +} + +void CodeGenData::setFinal( int snum ) +{ + RedStateAp *curState = allStates + snum; + curState->isFinal = true; +} + + +void CodeGenData::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedStateAp *curState = allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = allActionTables + eofAction; +} + +void CodeGenData::resolveTargetStates( InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: + case InlineItem::Next: case InlineItem::Entry: + item->targState = allStates + item->targId; + break; + default: + break; + } + + if ( item->children != 0 ) + resolveTargetStates( item->children ); + } +} + + +void CodeGenData::finishMachine() +{ + if ( redFsm->forcedErrorState ) + redFsm->getErrorState(); + + /* We get the start state as an offset, set the pointer now. */ + redFsm->startState = allStates + startState; + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( allStates + *en ); + + for ( ActionList::Iter a = actionList; a.lte(); a++ ) + resolveTargetStates( a->inlineList ); + + /* Note that even if we want a complete graph we do not give the error + * state a default transition. All machines break out of the processing + * loop when in the error state. */ + + if ( codeStyle == GenGoto || codeStyle == GenFGoto || codeStyle == GenIpGoto ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) + st->stateCondVect.append( sci ); + } + } +} + + +bool CodeGenData::setAlphType( char *data ) +{ + /* FIXME: This should validate the alphabet type selection. */ + HostType *alphType = hostLang->hostTypes + atoi(data); + thisKeyOps.setAlphType( alphType ); + return true; +} + +void CodeGenData::initCondSpaceList( ulong length ) +{ + allCondSpaces = new CondSpace[length]; + for ( ulong c = 0; c < length; c++ ) + condSpaceList.append( allCondSpaces + c ); +} + +void CodeGenData::newCondSpace( int cnum, int condSpaceId, Key baseKey ) +{ + CondSpace *cond = allCondSpaces + cnum; + cond->condSpaceId = condSpaceId; + cond->baseKey = baseKey; +} + +void CodeGenData::condSpaceItem( int cnum, long condActionId ) +{ + CondSpace *cond = allCondSpaces + cnum; + cond->condSet.append( allActions + condActionId ); +} + +void CodeGenData::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void CodeGenData::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) +{ + RedStateAp *curState = allStates + snum; + + /* Create the new state condition. */ + StateCond *stateCond = new StateCond; + stateCond->lowKey = lowKey; + stateCond->highKey = highKey; + + /* Assign it a cond space. */ + CondSpace *condSpace = allCondSpaces + condNum; + stateCond->condSpace = condSpace; + + curState->stateCondList.append( stateCond ); +} + + +/* Generate the codegen depending on the command line options given. */ +void CodeGenData::makeCodeGen() +{ + switch ( hostLangType ) { + case CCode: + switch ( codeStyle ) { + case GenTables: + codeGen = new CTabCodeGen; + break; + case GenFTables: + codeGen = new CFTabCodeGen; + break; + case GenFlat: + codeGen = new CFlatCodeGen; + break; + case GenFFlat: + codeGen = new CFFlatCodeGen; + break; + case GenGoto: + codeGen = new CGotoCodeGen; + break; + case GenFGoto: + codeGen = new CFGotoCodeGen; + break; + case GenIpGoto: + codeGen = new CIpGotoCodeGen; + break; + case GenSplit: + codeGen = new CSplitCodeGen; + break; + } + break; + + case DCode: + switch ( codeStyle ) { + case GenTables: + codeGen = new DTabCodeGen; + break; + case GenFTables: + codeGen = new DFTabCodeGen; + break; + case GenFlat: + codeGen = new DFlatCodeGen; + break; + case GenFFlat: + codeGen = new DFFlatCodeGen; + break; + case GenGoto: + codeGen = new DGotoCodeGen; + break; + case GenFGoto: + codeGen = new DFGotoCodeGen; + break; + case GenIpGoto: + codeGen = new DIpGotoCodeGen; + break; + case GenSplit: + codeGen = new DSplitCodeGen; + break; + } + break; + + case JavaCode: + switch ( codeStyle ) { + case GenTables: + codeGen = new JavaTabCodeGen; + break; + default: + assert(false); + break; + } + break; + } + + codeGen->fsmName = fsmName; + codeGen->cgd = this; +} + +CondSpace *CodeGenData::findCondSpace( Key lowKey, Key highKey ) +{ + for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) { + Key csHighKey = cs->baseKey; + csHighKey += keyOps->alphSize() * (1 << cs->condSet.length()); + + if ( lowKey >= cs->baseKey && highKey <= csHighKey ) + return cs; + } + return 0; +} + +Condition *CodeGenData::findCondition( Key key ) +{ + for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) { + Key upperKey = cond->baseKey + (1 << cond->condSet.length()); + if ( cond->baseKey <= key && key <= upperKey ) + return cond; + } + return 0; +} + +Key CodeGenData::findMaxKey() +{ + Key maxKey = keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( highKey > maxKey ) + maxKey = highKey; + } + } + return maxKey; +} + +/* Generate the code for an fsm. Assumes parseData is set up properly. Called + * by parser code. */ +void CodeGenData::prepareMachine() +{ + if ( hasBeenPrepared ) + return; + hasBeenPrepared = true; + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + Key maxKey = findMaxKey(); + + redFsm->assignActionLocs(); + + /* Order the states. */ + redFsm->depthFirstOrdering(); + + if ( codeStyle == GenGoto || codeStyle == GenFGoto || + codeStyle == GenIpGoto || codeStyle == GenSplit ) + { + /* For goto driven machines we can keep the original depth + * first ordering because it's ok if the state ids are not + * sequential. Split the the ids by final state status. */ + redFsm->sortStateIdsByFinal(); + } + else { + /* For table driven machines the location of the state is used to + * identify it so the states must be sorted by their final ids. + * Though having a deterministic ordering is important, + * specifically preserving the depth first ordering is not because + * states are stored in tables. */ + redFsm->sortStatesByFinal(); + redFsm->sequentialStateIds(); + } + + /* Find the first final state. This is the final state with the lowest + * id. */ + redFsm->findFirstFinState(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Maybe do flat expand, otherwise choose single. */ + if ( codeStyle == GenFlat || codeStyle == GenFFlat ) + redFsm->makeFlat(); + else + redFsm->chooseSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return; + + if ( codeStyle == GenSplit ) + redFsm->partitionFsm( numSplitPartitions ); + + if ( codeStyle == GenIpGoto || codeStyle == GenSplit ) + redFsm->setInTrans(); + + /* Make a code generator that will output the header/code. */ + if ( codeGen == 0 ) + makeCodeGen(); + codeGen->redFsm = redFsm; + + /* Anlayze Machine will find the final action reference counts, among + * other things. We will use these in reporting the usage + * of fsm directives in action code. */ + codeGen->analyzeMachine(); + codeGen->maxKey = maxKey; +} + +void CodeGenData::generateGraphviz() +{ + /* Do ordering and choose state ids. */ + redFsm->depthFirstOrdering(); + redFsm->sequentialStateIds(); + + /* For dot file generation we want to pick default transitions. */ + redFsm->chooseDefaultSpan(); + + /* Make the generator. */ + GraphvizDotGen dotGen( fsmName, this, redFsm, *outStream ); + + /* Write out with it. */ + dotGen.writeDotFile(); +} + +void CodeGenData::generateCode() +{ + if ( writeOps & WO_NOEND ) + hasEnd = false; + + if ( writeOps & WO_NOERROR ) + writeErr = false; + + if ( writeOps & WO_NOPREFIX ) + dataPrefix = false; + + if ( writeOps & WO_NOFF ) + writeFirstFinal = false; + + if ( writeData || writeInit || writeExec || writeEOF ) { + prepareMachine(); + + /* Force a newline. */ + *outStream << "\n"; + genLineDirective( *outStream ); + } + + + if ( writeExec ) { + /* Must set labels immediately before writing because we may depend + * on the noend write option. */ + codeGen->setLabelsNeeded(); + } + + if ( writeData ) + codeGen->writeOutData(); + + if ( writeInit ) + codeGen->writeOutInit(); + + if ( writeExec ) + codeGen->writeOutExec(); + + if ( writeEOF ) + codeGen->writeOutEOF(); +} + +void CodeGenData::generate() +{ + if ( redFsm != 0 ) { + if ( outputFormat == OutCode ) + generateCode(); + else if ( outputFormat == OutGraphvizDot && !graphvizDone ) { + graphvizDone = true; + generateGraphviz(); + } + } +} + +void lineDirective( ostream &out, char *fileName, int line ) +{ + if ( hostLangType != JavaCode ) { + /* Write the preprocessor line info for to the input file. */ + out << "#line " << line << " \""; + for ( char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << "\"\n"; + } +} + +void genLineDirective( ostream &out ) +{ + lineDirective( out, outputFileName, outFilter->line + 1 ); +} diff --git a/rlcodegen/gendata.h b/rlcodegen/gendata.h new file mode 100644 index 0000000..69d4243 --- /dev/null +++ b/rlcodegen/gendata.h @@ -0,0 +1,159 @@ +/* + * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GENDATA_H +#define _GENDATA_H + +#include <iostream> +#include "redfsm.h" +#include "fsmcodegen.h" +#include "common.h" + +struct NameInst; +typedef DList<Action> ActionList; + +typedef unsigned long ulong; + +typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap; +typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl; + +#define WO_NOEND 0x01 +#define WO_NOERROR 0x02 +#define WO_NOPREFIX 0x04 +#define WO_NOFF 0x08 + +struct CodeGenData +{ + CodeGenData( char *fileName, char *fsmName, bool wantComplete ) + : + fileName(fileName), + fsmName(fsmName), + redFsm(0), + allActions(0), + allActionTables(0), + allConditions(0), + allCondSpaces(0), + allStates(0), + nameIndex(0), + startState(0), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + codeGen(0), + wantComplete(wantComplete), + writeOps(0), + writeData(false), + writeInit(false), + writeExec(false), + writeEOF(false), + hasLongestMatch(false), + hasEnd(true), + dataPrefix(true), + writeFirstFinal(true), + writeErr(true), + hasBeenPrepared(false) + { } + + /* + * Collecting the machine. + */ + + char *fileName; + char *fsmName; + RedFsmAp *redFsm; + Action *allActions; + RedAction *allActionTables; + Condition *allConditions; + CondSpace *allCondSpaces; + RedStateAp *allStates; + NameInst **nameIndex; + int startState; + ActionList actionList; + ConditionList conditionList; + CondSpaceList condSpaceList; + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + FsmCodeGen *codeGen; + KeyOps thisKeyOps; + bool wantComplete; + int writeOps; + bool writeData; + bool writeInit; + bool writeExec; + bool writeEOF; + EntryIdVect entryPointIds; + EntryNameVect entryPointNames; + bool hasLongestMatch; + + /* Write options. */ + bool hasEnd; + bool dataPrefix; + bool writeFirstFinal; + bool writeErr; + + void createMachine(); + void initActionList( unsigned long length ); + void newAction( int anum, char *name, int line, int col, InlineList *inlineList ); + void initActionTableList( unsigned long length ); + void initStateList( unsigned long length ); + void setStartState( unsigned long startState ); + void addEntryPoint( char *name, unsigned long entryState ); + void setFinal( int snum ); + void initTransList( int snum, unsigned long length ); + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + long targ, long act ); + void finishTransList( int snum ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void finishMachine(); + void setForcedErrorState() + { redFsm->forcedErrorState = true; } + + void initCondSpaceList( ulong length ); + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId, Key baseKey ); + + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + + CondSpace *findCondSpace( Key lowKey, Key highKey ); + Condition *findCondition( Key key ); + + bool setAlphType( char *data ); + + void makeCodeGen(); + void generateGraphviz(); + void resolveTargetStates( InlineList *inlineList ); + Key findMaxKey(); + + void generate(); + void generateCode(); + void prepareMachine(); + bool hasBeenPrepared; +}; + +extern CodeGenData *cgd; + +void lineDirective( ostream &out, char *fileName, int line ); +void genLineDirective( ostream &out ); + +#endif /* _GENDATA_H */ diff --git a/rlcodegen/gotocodegen.cpp b/rlcodegen/gotocodegen.cpp new file mode 100644 index 0000000..2b764c1 --- /dev/null +++ b/rlcodegen/gotocodegen.cpp @@ -0,0 +1,754 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "gotocodegen.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" + +/* Emit the goto to take for a given transition. */ +std::ostream &GotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + out << TABS(level) << "goto tr" << trans->id << ";"; + return out; +} + +std::ostream &GotoCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void GotoCodeGen::GOTO_HEADER( RedStateAp *state ) +{ + /* Label the state. */ + out << "case " << state->id << ":\n"; +} + + +void GotoCodeGen::emitSingleSwitch( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_WIDE_KEY(state) << " == " << + KEY(data[0].lowKey) << " )\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value, 0) << "\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "\t\tcase " << KEY(data[j].lowKey) << ": "; + TRANS_GOTO(data[j].value, 0) << "\n"; + } + + /* Emits a default case for D code. */ + SWITCH_DEFAULT(); + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void GotoCodeGen::emitRangeBSearch( RedStateAp *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid].lowKey == keyOps->minKey; + bool limitHigh = data[mid].highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << + KEY(data[mid].lowKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } +} + +void GotoCodeGen::STATE_GOTO_ERROR() +{ + /* Label the state and bail immediately. */ + outLabelUsed = true; + RedStateAp *state = redFsm->errState; + out << "case " << state->id << ":\n"; + out << " goto _out;\n"; +} + +void GotoCodeGen::COND_TRANSLATE( StateCond *stateCond, int level ) +{ + CondSpace *condSpace = stateCond->condSpace; + out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(level) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } +} + +void GotoCodeGen::emitCondBSearch( RedStateAp *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + StateCond **data = state->stateCondVect.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid]->lowKey == keyOps->minKey; + bool limitHigh = data[mid]->highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid]->lowKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " )\n {"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_TRANSLATE(data[mid], level); + } + } +} + +std::ostream &GotoCodeGen::STATE_GOTOS() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + return out; +} + +std::ostream &GotoCodeGen::TRANSITIONS() +{ + /* Emit any transitions that have functions and that go to + * this state. */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Write the label for the transition so it can be jumped to. */ + out << " tr" << trans->id << ": "; + + /* Destination state. */ + if ( trans->action != 0 && trans->action->anyCurStateRef() ) + out << "_ps = " << CS() << ";"; + out << CS() << " = " << trans->targ->id << "; "; + + if ( trans->action != 0 ) { + /* Write out the transition func. */ + out << "goto f" << trans->action->actListId << ";\n"; + } + else { + /* No code to execute, just loop around. */ + out << "goto _again;\n"; + } + } + return out; +} + +std::ostream &GotoCodeGen::EXEC_FUNCS() +{ + /* Make labels that set acts and jump to execFuncs. Loop func indicies. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + out << " f" << redAct->actListId << ": " << + "_acts = " << ARR_OFF(A(), itoa( redAct->location+1 ) ) << ";" + " goto execFuncs;\n"; + } + } + + out << + "\n" + "execFuncs:\n" + " _nacts = *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " goto _again;\n"; + return out; +} + +unsigned int GotoCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int GotoCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int GotoCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +std::ostream &GotoCodeGen::TO_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::FROM_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::EOF_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = EOF_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* States that are final and have an out action need a case. */ + if ( st->eofAction != 0 ) { + /* Write the case label. */ + out << "\t\tcase " << st->id << ": "; + + /* Write the goto func. */ + out << "goto f" << st->eofAction->actListId << ";\n"; + } + } + + return out; +} + +void GotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void GotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void GotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void GotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void GotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void GotoCodeGen::writeOutData() +{ + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } +} + +void GotoCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << " {\n"; + + if ( anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + out << "\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + out << + " switch ( " << CS() << " ) {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( anyRegActions() ) + EXEC_FUNCS() << "\n"; + + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void GotoCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/gotocodegen.h b/rlcodegen/gotocodegen.h new file mode 100644 index 0000000..352e63e --- /dev/null +++ b/rlcodegen/gotocodegen.h @@ -0,0 +1,106 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GOTOCODEGEN_H +#define _GOTOCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct StateCond; + +/* + * Goto driven fsm. + */ +class GotoCodeGen : virtual public FsmCodeGen +{ +public: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &STATE_GOTOS(); + std::ostream &TRANSITIONS(); + std::ostream &EXEC_FUNCS(); + std::ostream &FINISH_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual unsigned int TO_STATE_ACTION( RedStateAp *state ); + virtual unsigned int FROM_STATE_ACTION( RedStateAp *state ); + virtual unsigned int EOF_ACTION( RedStateAp *state ); + + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + + void COND_TRANSLATE( StateCond *stateCond, int level ); + void emitCondBSearch( RedStateAp *state, int level, int low, int high ); + void STATE_CONDS( RedStateAp *state, bool genDefault ); + + virtual std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + + void emitSingleSwitch( RedStateAp *state ); + void emitRangeBSearch( RedStateAp *state, int level, int low, int high ); + + /* Called from STATE_GOTOS just before writing the gotos */ + virtual void GOTO_HEADER( RedStateAp *state ); + virtual void STATE_GOTO_ERROR(); + + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); +}; + +/* + * class CGotoCodeGen + */ +struct CGotoCodeGen + : public GotoCodeGen, public CCodeGen +{ +}; + +/* + * class DGotoCodeGen + */ +struct DGotoCodeGen + : public GotoCodeGen, public DCodeGen +{ +}; + + +#endif /* _GOTOCODEGEN_H */ diff --git a/rlcodegen/gvdotgen.cpp b/rlcodegen/gvdotgen.cpp new file mode 100644 index 0000000..6ea4a09 --- /dev/null +++ b/rlcodegen/gvdotgen.cpp @@ -0,0 +1,282 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "rlcodegen.h" +#include "gvdotgen.h" +#include "gendata.h" +#include "redfsm.h" + +using namespace std; + +GraphvizDotGen::GraphvizDotGen( char *fsmName, CodeGenData *cgd, + RedFsmAp *redFsm, ostream &out ) +: + fsmName(fsmName), + cgd(cgd), + redFsm(redFsm), + out(out) +{ +} + +std::ostream &GraphvizDotGen::KEY( Key key ) +{ + if ( printPrintables && key.isPrintable() ) { + // Output values as characters, ensuring we escape the quote (") character + char cVal = (char) key.getVal(); + out << "'"; + switch ( cVal ) { + case '"': case '\\': + out << "\\" << cVal; + break; + default: + out << cVal; + break; + } + out << "'"; + } + else { + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); + } + + return out; +} + +std::ostream &GraphvizDotGen::TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans ) +{ + int n = 0; + RedAction *actions[3]; + + if ( fromState->fromStateAction != 0 ) + actions[n++] = fromState->fromStateAction; + if ( trans->action != 0 ) + actions[n++] = trans->action; + if ( trans->targ != 0 && trans->targ->toStateAction != 0 ) + actions[n++] = trans->targ->toStateAction; + + if ( n > 0 ) + out << " / "; + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( ActionTable::Iter actIt = actions[a]->key.first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + out << action->nameOrLoc(); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } + return out; +} + +std::ostream &GraphvizDotGen::ACTION( RedAction *action ) +{ + /* The action. */ + out << " / "; + for ( ActionTable::Iter actIt = action->key.first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + if ( action->name != 0 ) + out << action->name; + else + out << action->loc.line << ":" << action->loc.col; + if ( !actIt.last() ) + out << ", "; + } + return out; +} + +std::ostream &GraphvizDotGen::ONCHAR( Key lowKey, Key highKey ) +{ + if ( lowKey > keyOps->maxKey ) { + CondSpace *condSpace = cgd->findCondSpace( lowKey, highKey ); + Key values = ( lowKey - condSpace->baseKey ) / keyOps->alphSize(); + + lowKey = keyOps->minKey + + (lowKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + highKey = keyOps->minKey + + (highKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + KEY( lowKey ); + if ( lowKey != highKey ) { + out << ".."; + KEY( highKey ); + } + out << "("; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + bool set = values & (1 << csi.pos()); + if ( !set ) + out << "!"; + out << (*csi)->nameOrLoc(); + if ( !csi.last() ) + out << ", "; + } + out << ")"; + } + else { + /* Output the key. Possibly a range. */ + KEY( lowKey ); + if ( highKey != lowKey ) { + out << ".."; + KEY( highKey ); + } + } + return out; +} + +void GraphvizDotGen::writeTransList( RedStateAp *state ) +{ + /* Build the set of unique transitions out of this state. */ + RedTransSet stTransSet; + for ( RedTransList::Iter tel = state->outRange; tel.lte(); tel++ ) { + /* If we haven't seen the transitions before, the move forward + * emitting all the transitions on the same character. */ + if ( stTransSet.insert( tel->value ) ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( tel->value->targ == 0 ) + out << "err_" << state->id; + else + out << tel->value->targ->id; + + /* Begin the label. */ + out << " [ label = \""; + ONCHAR( tel->lowKey, tel->highKey ); + + /* Walk the transition list, finding the same. */ + for ( RedTransList::Iter mtel = tel.next(); mtel.lte(); mtel++ ) { + if ( mtel->value == tel->value ) { + out << ", "; + ONCHAR( mtel->lowKey, mtel->highKey ); + } + } + + /* Write the action and close the transition. */ + TRANS_ACTION( state, tel->value ); + out << "\" ];\n"; + } + } + + /* Write the default transition. */ + if ( state->defTrans != 0 ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( state->defTrans->targ == 0 ) + out << "err_" << state->id; + else + out << state->defTrans->targ->id; + + /* Begin the label. */ + out << " [ label = \"DEF"; + + /* Write the action and close the transition. */ + TRANS_ACTION( state, state->defTrans ); + out << "\" ];\n"; + } +} + +void GraphvizDotGen::writeDotFile( ) +{ + out << + "digraph " << fsmName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + out << " ENTRY;\n"; + + /* Psuedo states for entry points in the entry map. */ + for ( EntryIdVect::Iter en = cgd->entryPointIds; en.lte(); en++ ) { + RedStateAp *state = cgd->allStates + *en; + out << " en_" << state->id << ";\n"; + } + + /* Psuedo states for final states with eof actions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) + out << " eof_" << st->id << ";\n"; + } + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Psuedo states for states whose default actions go to error. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + bool needsErr = false; + if ( st->defTrans != 0 && st->defTrans->targ == 0 ) + needsErr = true; + else { + for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) { + if ( tel->value->targ == 0 ) { + needsErr = true; + break; + } + } + } + + if ( needsErr ) + out << " err_" << st->id << " [ label=\"\"];\n"; + } + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->isFinal ) + out << " " << st->id << ";\n"; + } + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Transitions into the start state. */ + out << " ENTRY -> " << redFsm->startState->id << " [ label = \"IN"; + out << "\" ];\n"; + + /* Transitions into the entry points. */ + for ( EntryIdVect::Iter en = cgd->entryPointIds; en.lte(); en++ ) { + RedStateAp *state = cgd->allStates + *en; + char *name = cgd->entryPointNames[en.pos()]; + out << " en_" << state->id << " -> " << state->id << + " [ label = \"" << name << "\" ];\n"; + } + + /* Out action transitions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + out << " " << st->id << " -> eof_" << + st->id << " [ label = \"EOF"; + ACTION( st->eofAction ) << "\" ];\n"; + } + } + + out << + "}\n"; +} diff --git a/rlcodegen/gvdotgen.h b/rlcodegen/gvdotgen.h new file mode 100644 index 0000000..3dfcebc --- /dev/null +++ b/rlcodegen/gvdotgen.h @@ -0,0 +1,55 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GVDOTGEN_H +#define _GVDOTGEN_H + +#include <iostream> +#include "redfsm.h" + +/* Forwards. */ +struct CodeGenData; + +class GraphvizDotGen +{ +public: + GraphvizDotGen( char *fsmName, CodeGenData *cgd, + RedFsmAp *redFsm, std::ostream &out ); + + /* Print an fsm to out stream. */ + void writeTransList( RedStateAp *state ); + void writeDotFile( ); + +private: + /* Writing labels and actions. */ + std::ostream &ONCHAR( Key lowKey, Key highKey ); + std::ostream &TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans ); + std::ostream &ACTION( RedAction *action ); + std::ostream &KEY( Key key ); + + char *fsmName; + CodeGenData *cgd; + RedFsmAp *redFsm; + std::ostream &out; +}; + + +#endif /* _GVDOTGEN_H */ diff --git a/rlcodegen/ipgotocodegen.cpp b/rlcodegen/ipgotocodegen.cpp new file mode 100644 index 0000000..5100fdf --- /dev/null +++ b/rlcodegen/ipgotocodegen.cpp @@ -0,0 +1,418 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "ipgotocodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" + +void IpGotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CTRL_FLOW() << "goto st" << gotoDest << ";}"; +} + +void IpGotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << + "; " << CTRL_FLOW() << "goto st" << callDest << ";}"; +} + +void IpGotoCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void IpGotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void IpGotoCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void IpGotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << targState; +} + +void IpGotoCodeGen::BREAK( ostream &ret, int targState ) +{ + ret << CTRL_FLOW() << "goto _out" << targState << ";"; +} + +bool IpGotoCodeGen::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInTrans; it++ ) { + RedTransAp *trans = state->inTrans[it]; + if ( trans->action != 0 && trans->labelNeeded ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + out << "tr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( trans->action->anyNextStmt() ) + out << "\tgoto _again;\n"; + else + out << "\tgoto st" << trans->targ->id << ";\n"; + } + } + + return anyWritten; +} + +/* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for each + * state. */ +void IpGotoCodeGen::GOTO_HEADER( RedStateAp *state ) +{ + bool anyWritten = IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Advance and test buffer pos. */ + if ( state->labelNeeded ) { + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out" << state->id << ";\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + if ( anyWritten ) + genLineDirective( out ); + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +void IpGotoCodeGen::STATE_GOTO_ERROR() +{ + /* In the error state we need to emit some stuff that usually goes into + * the header. */ + RedStateAp *state = redFsm->errState; + bool anyWritten = IN_TRANS_ACTIONS( state ); + + /* No case label needed since we don't switch on the error state. */ + if ( anyWritten ) + genLineDirective( out ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + /* Break out here. */ + out << " goto _out" << state->id << ";\n"; +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + return out; +} + +std::ostream &IpGotoCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outNeeded ) { + outLabelUsed = true; + out << " _out" << st->id << ": " << CS() << " = " << + st->id << "; goto _out; \n"; + } + } + return out; +} + +std::ostream &IpGotoCodeGen::AGAIN_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + " case " << st->id << ": goto st" << st->id << ";\n"; + } + return out; +} + +std::ostream &IpGotoCodeGen::FINISH_CASES() +{ + bool anyWritten = false; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + if ( st->eofAction->eofRefs == 0 ) + st->eofAction->eofRefs = new IntSet; + st->eofAction->eofRefs->insert( st->id ); + } + } + + for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + if ( act->eofRefs != 0 ) { + for ( IntSet::Iter pst = *act->eofRefs; pst.lte(); pst++ ) + out << " case " << *pst << ": \n"; + + /* Remember that we wrote a trans so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write each action in the eof action list. */ + for ( ActionTable::Iter item = act->key; item.lte(); item++ ) + ACTION( out, item->value, STATE_ERR_STATE, true ); + out << "\tbreak;\n"; + } + } + + if ( anyWritten ) + genLineDirective( out ); + return out; +} + +void IpGotoCodeGen::setLabelsNeeded( InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +/* Set up labelNeeded flag for each state. */ +void IpGotoCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + if ( redFsm->errState != 0 && anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( trans->action != 0 ) { + /* Loop the actions. */ + for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } + } + } + + if ( cgd->hasEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } + else { + if ( redFsm->errState != 0 ) + redFsm->errState->outNeeded = true; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Any state with a transition in that has a break will need an + * out label. */ + if ( trans->action != 0 && trans->action->anyBreakStmt() ) + trans->targ->outNeeded = true; + } + } +} + +void IpGotoCodeGen::writeOutData() +{ + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } +} + +void IpGotoCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << " {\n"; + + if ( anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + if ( useAgainLabel() ) { + out << + " goto _resume;\n" + "\n" + "_again:\n" + " switch ( " << CS() << " ) {\n"; + AGAIN_CASES() << + " default: break;\n" + " }\n" + "\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << "_resume:\n"; + } + + out << + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n"; + EXIT_STATES() << + "\n"; + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << + " }\n"; +} + +void IpGotoCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " switch ( " << CS() << " ) {\n"; + FINISH_CASES(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/ipgotocodegen.h b/rlcodegen/ipgotocodegen.h new file mode 100644 index 0000000..25b64ee --- /dev/null +++ b/rlcodegen/ipgotocodegen.h @@ -0,0 +1,92 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _IPGCODEGEN_H +#define _IPGCODEGEN_H + +#include <iostream> +#include "gotocodegen.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class FGotoCodeGen + */ +class IpGotoCodeGen : public GotoCodeGen +{ +public: + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + std::ostream &FINISH_CASES(); + std::ostream &AGAIN_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState ); + + virtual void writeOutData(); + virtual void writeOutEOF(); + virtual void writeOutExec(); + +protected: + bool useAgainLabel() + { return anyRegActionRets() || anyRegActionByValControl() || anyRegNextStmt(); } + + /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void GOTO_HEADER( RedStateAp *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( InlineList *inlineList ); + void setLabelsNeeded(); +}; + + +/* + * class CIpGotoCodeGen + */ +struct CIpGotoCodeGen + : public IpGotoCodeGen, public CCodeGen +{ +}; + +/* + * class DIpGotoCodeGen + */ +struct DIpGotoCodeGen + : public IpGotoCodeGen, public DCodeGen +{ +}; + + +#endif /* _IPGCODEGEN_H */ diff --git a/rlcodegen/javacodegen.cpp b/rlcodegen/javacodegen.cpp new file mode 100644 index 0000000..f902620 --- /dev/null +++ b/rlcodegen/javacodegen.cpp @@ -0,0 +1,307 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "javacodegen.h" +#include "rlcodegen.h" +#include "tabcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +void JavaTabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "break _again;}"; +} + +void JavaTabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "break _again;}"; +} + +void JavaTabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "break _again;}"; +} + +void JavaTabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "break _again;}"; +} + +void JavaTabCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() + << "]; " << CTRL_FLOW() << "break _again;}"; +} + +void JavaTabCodeGen::BREAK( ostream &ret, int targState ) +{ + ret << CTRL_FLOW() << "break _resume;"; +} + +void JavaTabCodeGen::COND_TRANSLATE() +{ + out << + " _widec = " << GET_KEY() << ";\n" + " _keys = " << CO() << "[" << CS() << "]*2\n;" + " _klen = " << CL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " int _lower = _keys\n;" + " int _mid;\n" + " int _upper = _keys + (_klen<<1) - 2;\n" + " while (true) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < " << CK() << "[_mid] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > " << CK() << "[_mid] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " switch ( " << C() << "[" << CO() << "[" << CS() << "]" + " + ((_mid - _keys)>>1)] ) {\n" + ; + + for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) { + CondSpace *condSpace = csi; + out << " case " << condSpace->condSpaceId << ": {\n"; + out << TABS(2) << "_widec = " << KEY(condSpace->baseKey) << + " + (" << GET_KEY() << " - " << KEY(keyOps->minKey) << ");\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(2) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } + + out << + " break;\n" + " }\n"; + } + + out << + " }\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + + +void JavaTabCodeGen::LOCATE_TRANS() +{ + out << + " _match: do {\n" + " _keys = " << KO() << "[" << CS() << "]" << ";\n" + " _trans = " << IO() << "[" << CS() << "];\n" + " _klen = " << SL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " int _lower = _keys;\n" + " int _mid;\n" + " int _upper = _keys + _klen - 1;\n" + " while (true) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_WIDE_KEY() << " < " << K() << "[_mid] )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_WIDE_KEY() << " > " << K() << "[_mid] )\n" + " _lower = _mid + 1;\n" + " else {\n" + " _trans += (_mid - _keys);\n" + " break _match;\n" + " }\n" + " }\n" + " _keys += _klen;\n" + " _trans += _klen;\n" + " }\n" + "\n" + " _klen = " << RL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " int _lower = _keys;\n" + " int _mid;\n" + " int _upper = _keys + (_klen<<1) - 2;\n" + " while (true) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < " << K() << "[_mid] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > " << K() << "[_mid+1] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " _trans += ((_mid - _keys)>>1);\n" + " break _match;\n" + " }\n" + " }\n" + " _trans += _klen;\n" + " }\n" + " } while (false);\n" + "\n"; +} + +void JavaTabCodeGen::writeOutExec() +{ + out << + " {\n" + " int _klen"; + + if ( anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " int _trans;\n"; + + if ( anyConditions() ) + out << " int _widec;\n"; + + if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) { + out << + " int _acts;\n" + " int _nacts;\n"; + } + + out << + " int _keys;\n" + "\n"; + + if ( cgd->hasEnd ) + out << " if ( " << P() << " != " << PE() << " ) {\n"; + + out << " _resume: while ( true ) {\n"; + + out << " _again: do {\n"; + + if ( redFsm->errState != 0 ) { + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " break _resume;\n"; + } + + if ( anyFromStateActions() ) { + out << + " _acts = " << FSA() << "[" << CS() << "]" << ";\n" + " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( " << A() << "[_acts++] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + if ( anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + if ( useIndicies ) + out << " _trans = " << I() << "[_trans];\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " break _again;\n" + "\n" + " _acts = " << TA() << "[_trans]" << ";\n" + " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n" + " while ( _nacts-- > 0 )\n {\n" + " switch ( " << A() << "[_acts++] )\n" + " {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + /* Again loop, functions as again label. */ + out << " } while (false);\n"; + + if ( anyToStateActions() ) { + out << + " _acts = " << TSA() << "[" << CS() << "]" << ";\n" + " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( " << A() << "[_acts++] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " break _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + + /* Close the resume loop. */ + out << " }\n"; + + /* The if guarding on empty string. */ + if ( cgd->hasEnd ) + out << " }\n"; + + /* The execute block. */ + out << " }\n"; +} + +void JavaTabCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " int _acts = " << EA() << "[" << CS() << "]" << ";\n" + " int _nacts = " << CAST("int") << " " << A() << "[_acts++];\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( " << A() << "[_acts++] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} + diff --git a/rlcodegen/javacodegen.h b/rlcodegen/javacodegen.h new file mode 100644 index 0000000..878f647 --- /dev/null +++ b/rlcodegen/javacodegen.h @@ -0,0 +1,47 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _JAVACODEGEN_H +#define _JAVACODEGEN_H + +#include "tabcodegen.h" + +/* + * JavaTabCodeGen + */ +struct JavaTabCodeGen + : public TabCodeGen, public JavaCodeGen +{ + void BREAK( ostream &ret, int targState ); + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + + void COND_TRANSLATE(); + void LOCATE_TRANS(); + virtual void writeOutExec(); + virtual void writeOutEOF(); +}; + + +#endif diff --git a/rlcodegen/main.cpp b/rlcodegen/main.cpp new file mode 100644 index 0000000..f9a0598 --- /dev/null +++ b/rlcodegen/main.cpp @@ -0,0 +1,441 @@ +/* + * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <unistd.h> + +#include "rlcodegen.h" +#include "rlcodegen.h" +#include "xmlparse.h" +#include "pcheck.h" +#include "vector.h" +#include "version.h" + +#include "common.cpp" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Target language and output style. */ +OutputFormat outputFormat = OutCode; +CodeStyleEnum codeStyle = GenTables; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +output_filter *outFilter = 0; +char *outputFileName = 0; + +/* Graphviz dot file generation. */ +bool graphvizDone = false; + +char *gblFileName = "<unknown>"; + +int numSplitPartitions = 0; + +bool printPrintables = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: rlcodegen [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +"output:\n" +" -V Generate a Graphviz dotfile instead of code\n" +" -p Print printable characters in Graphviz output\n" +"generated code style:\n" +" -T0 Table driven FSM (default)\n" +" -T1 Faster table driven FSM\n" +" -F0 Flat table driven FSM\n" +" -F1 Faster flat table-driven FSM\n" +" -G0 Goto-driven FSM\n" +" -G1 Faster goto-driven FSM\n" +" -G2 Really fast goto-driven FSM\n" +" -P<N> N-Way Split really fast goto-driven FSM\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Ragel Code Generator version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2001-2006 by Adrian Thurston" << endl; +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +char *findFileExtension( char *stemFile ) +{ + char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +char *fileNameFromStem( char *stemFile, char *suffix ) +{ + int len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + /* Make the return string from the stem and the suffix. */ + char *retVal = new char[ len + strlen( suffix ) + 1 ]; + strncpy( retVal, stemFile, len ); + strcpy( retVal + len, suffix ); + + return retVal; +} + +/* Total error count. */ +int gblErrorCount = 0; + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + gblErrorCount += 1; + cerr << PROGNAME ": "; + return cerr; +} + +/* Print the opening to an error in the input, then return the error ostream. */ +//ostream &error( const YYLTYPE &loc ) +//{ +// gblErrorCount += 1; +// cerr << gblFileName << ":" << loc.first_line << ":" << loc.first_column << ": "; +// return cerr; +//} + +/* Print the opening to an error in the input, then return the error ostream. */ +//ostream &error( const InputLoc &loc ) +//{ +// gblErrorCount += 1; +// cerr << gblFileName << ":" << loc.line << ":" << loc.col << ": "; +// return cerr; +//} + +ostream &error( int first_line, int first_column ) +{ + gblErrorCount += 1; + cerr << gblFileName << ":" << ":" << first_line << ":" << first_column << ": "; + return cerr; +} + +ostream &warning( ) +{ + cerr << gblFileName << ":" << ": warning: "; + return cerr; +} + +ostream &warning( const InputLoc &loc ) +{ + cerr << gblFileName << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +std::ostream &warning( int first_line, int first_column ) +{ + cerr << gblFileName << ":" << first_line << ":" << + first_column << ": warning: "; + return cerr; +} + +//ostream &xml_error( const YYLTYPE &loc ) +//{ +// gblErrorCount += 1; +// cerr << "<xml-input>:" << loc.first_line << ":" << loc.first_column << ": "; +// return cerr; +//} + +ostream &xml_error( const InputLoc &loc ) +{ + gblErrorCount += 1; + cerr << "<xml-input>:" << loc.line << ":" << loc.col << ": "; + return cerr; +} + +/* Counts newlines before sending sync. */ +int output_filter::sync( ) +{ + line += 1; + return std::filebuf::sync(); +} + +/* Counts newlines before sending data out to file. */ +std::streamsize output_filter::xsputn( const char *s, std::streamsize n ) +{ + for ( int i = 0; i < n; i++ ) { + if ( s[i] == '\n' ) + line += 1; + } + return std::filebuf::xsputn( s, n ); +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +/* Invoked by the parser, after the source file + * name is taken from XML file. */ +void openOutput( char *inputFile ) +{ + /* If the output format is code and no output file name is given, then + * make a default. */ + if ( outputFormat == OutCode && outputFileName == 0 ) { + char *ext = findFileExtension( inputFile ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + outputFileName = fileNameFromStem( inputFile, ".h" ); + else { + char *defExtension = 0; + switch ( hostLangType ) { + case CCode: defExtension = ".c"; break; + case DCode: defExtension = ".d"; break; + case JavaCode: defExtension = ".java"; break; + } + outputFileName = fileNameFromStem( inputFile, defExtension ); + } + } + + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + if ( outputFileName != 0 ) { + /* Create the filter on the output and open it. */ + outFilter = new output_filter; + outFilter->open( outputFileName, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + /* Open the output stream, attaching it to the filter. */ + outStream = new ostream( outFilter ); + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, char **argv) +{ + ParamCheck pc("o:VpT:F:G:vHh?-:P:", argc, argv); + char *xmlInputFileName = 0; + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + /* Output formats. */ + case 'V': + outputFormat = OutGraphvizDot; + break; + + case 'p': + printPrintables = true; + break; + + /* Code style. */ + case 'T': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenTables; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFTables; + else { + error() << "-T" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'F': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenFlat; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFFlat; + else { + error() << "-F" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'G': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenGoto; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFGoto; + else if ( pc.parameterArg[0] == '2' ) + codeStyle = GenIpGoto; + else { + error() << "-G" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'P': + codeStyle = GenSplit; + numSplitPartitions = atoi( pc.parameterArg ); + break; + + /* Version and help. */ + case 'v': + version(); + exit(0); + case 'H': case 'h': case '?': + usage(); + exit(0); + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + break; + } + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( xmlInputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + xmlInputFileName = pc.curArg; + } + break; + } + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Open the input file for reading. */ + if ( xmlInputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( xmlInputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << xmlInputFileName << " for reading" << endl; + } + else { + xmlInputFileName = "<stdin>"; + inStream = &cin; + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Parse the input! */ + xml_parse( *inStream, xmlInputFileName ); + + /* If writing to a file, delete the ostream, causing it to flush. + * Standard out is flushed automatically. */ + if ( outputFileName != 0 ) { + delete outStream; + delete outFilter; + } + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) { + /* If we opened an output file, remove it. */ + if ( outputFileName != 0 ) + unlink( outputFileName ); + exit(1); + } + return 0; +} diff --git a/rlcodegen/redfsm.cpp b/rlcodegen/redfsm.cpp new file mode 100644 index 0000000..ffcc207 --- /dev/null +++ b/rlcodegen/redfsm.cpp @@ -0,0 +1,535 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "redfsm.h" +#include "avlmap.h" +#include <iostream> +#include <sstream> + +using std::ostringstream; + +KeyOps *keyOps = 0; + +string Action::nameOrLoc() +{ + if ( name != 0 ) + return string(name); + else { + ostringstream ret; + ret << loc.line << ":" << loc.col; + return ret.str(); + } +} + +RedFsmAp::RedFsmAp() +: + wantComplete(false), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + errState(0), + errTrans(0), + firstFinState(0), + numFinStates(0) +{ +} + +void RedFsmAp::depthFirstOrdering( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ != 0 ) + depthFirstOrdering( rtel->value->targ ); + } +} + +/* Ordering states by transition connections. */ +void RedFsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsmAp::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsmAp::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + RedStateAp *state = 0; + RedStateAp *next = stateList.head; + RedStateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsmAp::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +/* Find the final state with the lowest id. */ +void RedFsmAp::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsmAp::assignActionLocs() +{ + int nextLocation = 0; + for ( ActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsmAp::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTransAp *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + nextKey.decrement(); + if ( list[pos].highKey != nextKey ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list. */ +void RedFsmAp::moveTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsmAp::chooseSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveTransToSingle( st ); + } +} + +void RedFsmAp::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->stateCondList.length() == 0 ) { + st->condLowKey = 0; + st->condHighKey = 0; + } + else { + st->condLowKey = st->stateCondList.head->lowKey; + st->condHighKey = st->stateCondList.tail->highKey; + + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + st->condList = new CondSpace*[ span ]; + memset( st->condList, 0, sizeof(CondSpace*)*span ); + + for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->condLowKey, sci->lowKey )-1; + trSpan = keyOps->span( sci->lowKey, sci->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->condList[base+pos] = sci->condSpace; + } + } + + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.shallowCopy( outRange ); + outRange.abandon(); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsmAp::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->minKey < rtel->lowKey ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + highKey.increment(); + if ( highKey != rtel->lowKey ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) + return false; + + return true; +} + +RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTransAp *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsmAp::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTransAp *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ == state->next ) + return rtel->value; + } + return 0; +} + +void RedFsmAp::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTransAp *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsmAp::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::getErrorTrans( ) +{ + /* If the error trans has not been made aready, make it. */ + if ( errTrans == 0 ) { + /* This insert should always succeed since no transition created by + * the user can point to the error state. */ + errTrans = new RedTransAp( getErrorState(), 0, nextTransId++ ); + RedTransAp *inRes = transSet.insert( errTrans ); + assert( inRes != 0 ); + } + return errTrans; +} + +RedStateAp *RedFsmAp::getErrorState() +{ + /* Check if we need to init the error trans. */ + if ( errState == 0 ) { + errState = new RedStateAp(); + stateList.append( errState ); + } + return errState; +} + + +RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( targ, action, 0 ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( targ, action, nextTransId++ ); + transSet.insert( inDict ); + } + return inDict; +} + +void RedFsmAp::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsmAp::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->numInTrans += 1; + + /* Pass over states to allocate the needed memory. Reset the counts so we + * can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inTrans = new RedTransAp*[st->numInTrans]; + st->numInTrans = 0; + } + + /* Second pass over transitions copies pointers into the in trans list. */ + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->inTrans[trans->targ->numInTrans++] = trans; +} diff --git a/rlcodegen/redfsm.h b/rlcodegen/redfsm.h new file mode 100644 index 0000000..42df42e --- /dev/null +++ b/rlcodegen/redfsm.h @@ -0,0 +1,474 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _REDFSM_H +#define _REDFSM_H + +#include <assert.h> +#include <string.h> +#include <string> +#include "common.h" +#include "vector.h" +#include "dlist.h" +#include "compare.h" +#include "bstmap.h" +#include "bstset.h" +#include "avlmap.h" +#include "avltree.h" +#include "avlbasic.h" +#include "mergesort.h" +#include "rlcodegen.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +using std::string; + +struct RedStateAp; +struct InlineList; +struct Action; + +/* + * Inline code tree + */ +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, + PChar, Char, Hold, Exec, HoldTE, ExecTE, Curs, Targs, Entry, + LmSwitch, LmSetActId, LmSetTokEnd, LmGetTokEnd, LmInitTokStart, + LmInitAct, LmSetTokStart, SubAction, Break + }; + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), data(0), targId(0), targState(0), + lmId(0), children(0), offset(0), + handlesError(false), type(type) { } + + InputLoc loc; + char *data; + int targId; + RedStateAp *targState; + int lmId; + InlineList *children; + int offset; + bool handlesError; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action> +{ + Action( ) + : + name(0), + inlineList(0), + actionId(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0) + { + } + + /* Data collected during parse. */ + InputLoc loc; + char *name; + InlineList *inlineList; + int actionId; + + string nameOrLoc(); + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; +}; + + +/* Forwards. */ +struct RedStateAp; +struct StateAp; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Set of states. */ +typedef BstSet<RedStateAp*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; +}; +typedef AvlTree<RedAction, ActionTable, CmpActionTable> ActionTableMap; + +/* Reduced transition. */ +struct RedTransAp +: + public AvlTreeEl<RedTransAp> +{ + RedTransAp( RedStateAp *targ, RedAction *action, int id ) + : targ(targ), action(action), id(id), labelNeeded(true) { } + + RedStateAp *targ; + RedAction *action; + int id; + bool partitionBoundary; + bool labelNeeded; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTransAp +{ + static int compare( const RedTransAp &t1, const RedTransAp &t2 ) + { + if ( t1.targ < t2.targ ) + return -1; + else if ( t1.targ > t2.targ ) + return 1; + else if ( t1.action < t2.action ) + return -1; + else if ( t1.action > t2.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + RedTransAp *value; +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedStateAp*> RedStateVect; + +typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +typedef Vector< Action* > CondSet; + +struct Condition +{ + Condition( ) + : key(0), baseKey(0) {} + + Key key; + Key baseKey; + CondSet condSet; + + Condition *next, *prev; +}; +typedef DList<Condition> ConditionList; + +struct CondSpace +{ + Key baseKey; + CondSet condSet; + int condSpaceId; + + CondSpace *next, *prev; +}; +typedef DList<CondSpace> CondSpaceList; + +struct StateCond +{ + Key lowKey; + Key highKey; + + CondSpace *condSpace; + + StateCond *prev, *next; +}; +typedef DList<StateCond> StateCondList; +typedef Vector<StateCond*> StateCondVect; + +/* Reduced state. */ +struct RedStateAp +{ + RedStateAp() + : + defTrans(0), + condList(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inTrans(0), + numInTrans(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTransAp *defTrans; + + /* For flat conditions. */ + Key condLowKey, condHighKey; + CondSpace **condList; + + /* For flat keys. */ + Key lowKey, highKey; + RedTransAp **transList; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + int id; + StateCondList stateCondList; + StateCondVect stateCondVect; + + /* Pointers for the list of states. */ + RedStateAp *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedTransAp **inTrans; + int numInTrans; +}; + +/* List of states. */ +typedef DList<RedStateAp> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet; + +/* Next version of the fsm machine. */ +struct RedFsmAp +{ + RedFsmAp(); + + bool wantComplete; + bool forcedErrorState; + + int nextActionId; + int nextTransId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + TransApSet transSet; + ActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedStateAp *startState; + RedStateAp *errState; + RedTransAp *errTrans; + RedTransAp *errActionTrans; + RedStateAp *firstFinState; + int numFinStates; + int nParts; + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveTransToSingle( RedStateAp *state ); + void chooseSingle(); + + void makeFlat(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTransAp *defTrans, RedStateAp *state ); + + /* Pick a default transition by largest span. */ + RedTransAp *chooseDefaultSpan( RedStateAp *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTransAp *chooseDefaultNumRanges( RedStateAp *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTransAp *chooseDefaultGoto( RedStateAp *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedStateAp *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedStateAp *state ); + void depthFirstOrdering(); + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedTransAp *getErrorTrans(); + RedStateAp *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTransAp *allocateTrans( RedStateAp *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); +}; + + +#endif /* _REDFSM_H */ diff --git a/rlcodegen/rlcodegen.h b/rlcodegen/rlcodegen.h new file mode 100644 index 0000000..cc302ba --- /dev/null +++ b/rlcodegen/rlcodegen.h @@ -0,0 +1,162 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLCODEGEN_H +#define _RLCODEGEN_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include "avltree.h" +#include "vector.h" +#include "config.h" + +#define PROGNAME "rlcodegen" + +/* Target language. */ +enum OutputFormat +{ + OutCode, + OutGraphvizDot +}; + +/* Target output style. */ +enum CodeStyleEnum +{ + GenTables, + GenFTables, + GenFlat, + GenFFlat, + GenGoto, + GenFGoto, + GenIpGoto, + GenSplit +}; + +/* Filter on the output stream that keeps track of the number of lines + * output. */ +class output_filter : public std::filebuf +{ +public: + output_filter() : line(1) { } + + virtual int sync(); + virtual std::streamsize xsputn(const char* s, std::streamsize n); + + int line; +}; + +extern OutputFormat outputFormat; +extern CodeStyleEnum codeStyle; + +/* IO filenames and stream. */ +extern char *outputFileName; +extern std::ostream *outStream; +extern output_filter *outFilter; + +extern bool printPrintables; +extern bool graphvizDone; + +int xml_parse( std::istream &input, char *fileName ); + +extern int gblErrorCount; +extern char machineMain[]; + +extern int numSplitPartitions; + +/* + * Error reporting. + */ + +/* Location in an input file. */ +struct InputLoc +{ + int line; + int col; +}; + +struct AttrMarker +{ + char *id; + int idLen; + char *value; + int valueLen; +}; + +struct Attribute +{ + char *id; + char *value; +}; + +typedef Vector<AttrMarker> AttrMkList; +typedef Vector<Attribute> AttrList; +struct XMLTagHashPair; + +struct XMLTag +{ + enum TagType { Open, Close }; + + XMLTag( XMLTagHashPair *tagId, TagType type ) : + tagId(tagId), type(type), + content(0), attrList(0) {} + + Attribute *findAttr( char *id ) + { + if ( attrList != 0 ) { + for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) { + if ( strcmp( id, attr->id ) == 0 ) + return attr; + } + } + return 0; + } + + XMLTagHashPair *tagId; + TagType type; + + /* Content is associtated with closing tags. */ + char *content; + + /* Attribute lists are associated with opening tags. */ + AttrList *attrList; +}; + + +std::ostream &error(); +//std::ostream &error( const YYLTYPE &loc ); +std::ostream &error( const InputLoc &loc ); +std::ostream &error( int first_line, int first_column ); +std::ostream &warning( ); +std::ostream &warning( const InputLoc &loc ); +std::ostream &warning( int first_line, int first_column ); +std::ostream &xml_error( const InputLoc &loc ); +//std::ostream &xml_error( const YYLTYPE &loc ); + + + +void openOutput( char *inputFile ); +char *fileNameFromStem( char *stemFile, char *suffix ); + +/* Size of the include stack. */ +#define INCLUDE_STACK_SIZE 32 + +#endif /* _RLCODEGEN_H */ diff --git a/rlcodegen/splitcodegen.cpp b/rlcodegen/splitcodegen.cpp new file mode 100644 index 0000000..48519ba --- /dev/null +++ b/rlcodegen/splitcodegen.cpp @@ -0,0 +1,518 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "rlcodegen.h" +#include "splitcodegen.h" +#include "gendata.h" +#include <assert.h> + +using std::ostream; +using std::ios; +using std::endl; + +/* Emit the goto to take for a given transition. */ +std::ostream &SplitCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + if ( trans->targ->partition == currentPartition ) { + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + } + else { + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto ptr" << trans->id << ";"; + trans->partitionBoundary = true; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto pst" << trans->targ->id << ";"; + trans->targ->partitionBoundary = true; + } + } + return out; +} + +/* Called from before writing the gotos for each state. */ +void SplitCodeGen::GOTO_HEADER( RedStateAp *state, bool stateInPartition ) +{ + bool anyWritten = IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Advance and test buffer pos. */ + if ( state->labelNeeded ) { + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out" << state->id << ";\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + if ( anyWritten ) + genLineDirective( out ); + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +std::ostream &SplitCodeGen::STATE_GOTOS( int partition ) +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partition == partition ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* We call into the base of the goto which calls back into us + * using virtual functions. Set the current partition rather + * than coding parameter passing throughout. */ + currentPartition = partition; + + /* Writing code above state gotos. */ + GOTO_HEADER( st, st->partition == partition ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + } + return out; +} + + +std::ostream &SplitCodeGen::PART_TRANS( int partition ) +{ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->partitionBoundary ) { + out << + "ptr" << trans->id << ":\n"; + + if ( trans->action != 0 ) { + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + } + + out << + " goto pst" << trans->targ->id << ";\n"; + trans->targ->partitionBoundary = true; + } + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partitionBoundary ) { + out << + " pst" << st->id << ":\n" + " " << CS() << " = " << st->id << ";\n"; + + if ( st->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, st->id, false ); + genLineDirective( out ); + } + + ptOutLabelUsed = true; + out << " goto _pt_out; \n"; + } + } + return out; +} + +std::ostream &SplitCodeGen::EXIT_STATES( int partition ) +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partition == partition && st->outNeeded ) { + outLabelUsed = true; + out << " _out" << st->id << ": " << CS() << " = " << + st->id << "; goto _out; \n"; + } + } + return out; +} + + +std::ostream &SplitCodeGen::PARTITION( int partition ) +{ + outLabelUsed = false; + ptOutLabelUsed = false; + + /* Initialize the partition boundaries, which get set during the writing + * of states. After the state writing we will */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + trans->partitionBoundary = false; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->partitionBoundary = false; + + out << " " << ALPH_TYPE() << " *p = *_pp, *pe = *_ppe;\n"; + + if ( anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( useAgainLabel() ) { + out << + " goto _resume;\n" + "\n" + "_again:\n" + " switch ( " << CS() << " ) {\n"; + AGAIN_CASES() << + " default: break;\n" + " }\n" + "\n"; + + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << + "_resume:\n"; + } + + out << + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS( partition ); + SWITCH_DEFAULT() << + " }\n"; + PART_TRANS( partition ); + EXIT_STATES( partition ); + + if ( outLabelUsed ) { + out << + "\n" + " _out:\n" + " *_pp = p;\n" + " *_ppe = pe;\n" + " return 0;\n"; + } + + if ( ptOutLabelUsed ) { + out << + "\n" + " _pt_out:\n" + " *_pp = p;\n" + " *_ppe = pe;\n" + " return 1;\n"; + } + + return out; +} + +std::ostream &SplitCodeGen::PART_MAP() +{ + int *partMap = new int[redFsm->stateList.length()]; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + partMap[st->id] = st->partition; + + out << "\t"; + int totalItem = 0; + for ( int i = 0; i < redFsm->stateList.length(); i++ ) { + out << partMap[i]; + if ( i != redFsm->stateList.length() - 1 ) { + out << ", "; + if ( ++totalItem % IALL == 0 ) + out << "\n\t"; + } + } + + delete[] partMap; + return out; +} + +void SplitCodeGen::writeOutData() +{ + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } + + + OPEN_ARRAY( ARRAY_TYPE(numSplitPartitions), PM() ); + PART_MAP(); + CLOSE_ARRAY() << + "\n"; + + for ( int p = 0; p < redFsm->nParts; p++ ) { + out << "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() << + " **_ppe, struct " << FSM_NAME() << " *fsm );\n"; + } + out << "\n"; +} + +std::ostream &SplitCodeGen::ALL_PARTITIONS() +{ + /* compute the format string. */ + int width = 0, high = redFsm->nParts - 1; + while ( high > 0 ) { + width++; + high /= 10; + } + assert( width <= 8 ); + char suffFormat[] = "_%6.6d.c"; + suffFormat[2] = suffFormat[4] = ( '0' + width ); + + for ( int p = 0; p < redFsm->nParts; p++ ) { + char suffix[10]; + sprintf( suffix, suffFormat, p ); + char *fn = fileNameFromStem( cgd->fileName, suffix ); + char *include = fileNameFromStem( cgd->fileName, ".h" ); + + /* Create the filter on the output and open it. */ + output_filter *partFilter = new output_filter; + partFilter->open( fn, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << fn << " for writing" << endl; + exit(1); + } + + /* Attach the new file to the output stream. */ + std::streambuf *prev_rdbuf = out.rdbuf( partFilter ); + + out << + "#include \"" << include << "\"\n" + "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() << + " **_ppe, struct " << FSM_NAME() << " *fsm )\n" + "{\n"; + PARTITION( p ) << + "}\n\n"; + out.flush(); + + /* Fix the output stream. */ + out.rdbuf( prev_rdbuf ); + } + return out; +} + + +void SplitCodeGen::writeOutExec() +{ + out << + " {\n" + " int _stat = 0;\n"; + + if ( cgd->hasEnd ) { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << " goto _resume;\n"; + + /* In this reentry, to-state actions have already been executed on the + * partition-switch exit from the last partition. */ + out << "_reenter:\n"; + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << "_resume:\n"; + + out << + " switch ( " << PM() << "[" << CS() << "] ) {\n"; + for ( int p = 0; p < redFsm->nParts; p++ ) { + out << + " case " << p << ":\n" + " _stat = partition" << p << "( &p, &pe, fsm );\n" + " break;\n"; + } + out << + " }\n" + " if ( _stat )\n" + " goto _reenter;\n"; + + if ( cgd->hasEnd ) + out << " _out: {}\n"; + + out << + " }\n"; + + ALL_PARTITIONS(); +} + +void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: { + /* In split code gen we only need labels for transitions across + * partitions. */ + if ( fromState->partition == item->targState->partition ){ + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + } + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( fromState, item->children ); + } +} + +void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans ) +{ + /* In the split code gen we don't need labels for transitions across + * partitions. */ + if ( fromState->partition == trans->targ->partition ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + trans->labelNeeded = true; + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + } + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( trans->action != 0 ) { + /* Loop the actions. */ + for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( fromState, act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void SplitCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + trans->labelNeeded = false; + + if ( redFsm->errState != 0 && anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) + setLabelsNeeded( st, tel->value ); + + for ( RedTransList::Iter tel = st->outSingle; tel.lte(); tel++ ) + setLabelsNeeded( st, tel->value ); + + if ( st->defTrans != 0 ) + setLabelsNeeded( st, st->defTrans ); + } + } + + if ( cgd->hasEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } + else { + if ( redFsm->errState != 0 ) + redFsm->errState->outNeeded = true; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Any state with a transition in that has a break will need an + * out label. */ + if ( trans->action != 0 && trans->action->anyBreakStmt() ) + trans->targ->outNeeded = true; + } + } +} + diff --git a/rlcodegen/splitcodegen.h b/rlcodegen/splitcodegen.h new file mode 100644 index 0000000..03c2139 --- /dev/null +++ b/rlcodegen/splitcodegen.h @@ -0,0 +1,65 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _SPLITCODEGEN_H +#define _SPLITCODEGEN_H + +#include "ipgotocodegen.h" + +class SplitCodeGen : public IpGotoCodeGen +{ +public: + bool ptOutLabelUsed; + + std::ostream &PART_MAP(); + std::ostream &EXIT_STATES( int partition ); + std::ostream &PART_TRANS( int partition ); + std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + void GOTO_HEADER( RedStateAp *state, bool stateInPartition ); + std::ostream &STATE_GOTOS( int partition ); + std::ostream &PARTITION( int partition ); + std::ostream &ALL_PARTITIONS(); + void writeOutData(); + void writeOutExec(); + void writeOutParts(); + + void setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList ); + void setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans ); + void setLabelsNeeded(); + + int currentPartition; +}; + +struct CSplitCodeGen + : public SplitCodeGen, public CCodeGen +{ +}; + +/* + * class DIpGotoCodeGen + */ +struct DSplitCodeGen + : public IpGotoCodeGen, public DCodeGen +{ +}; + + +#endif /* _SPLITCODEGEN_H */ diff --git a/rlcodegen/tabcodegen.cpp b/rlcodegen/tabcodegen.cpp new file mode 100644 index 0000000..b382256 --- /dev/null +++ b/rlcodegen/tabcodegen.cpp @@ -0,0 +1,996 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlcodegen.h" +#include "tabcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +/* Determine if we should use indicies or not. */ +void TabCodeGen::calcIndexSize() +{ + int sizeWithInds = 0, sizeWithoutInds = 0; + + /* Calculate cost of using with indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithInds += arrayTypeSize(maxIndex) * totalIndex; + } + sizeWithInds += arrayTypeSize(maxState) * redFsm->transSet.length(); + if ( anyActions() ) + sizeWithInds += arrayTypeSize(maxActionLoc) * redFsm->transSet.length(); + + /* Calculate the cost of not using indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithoutInds += arrayTypeSize(maxState) * totalIndex; + if ( anyActions() ) + sizeWithoutInds += arrayTypeSize(maxActionLoc) * totalIndex; + } + + /* If using indicies reduces the size, use them. */ + useIndicies = sizeWithInds < sizeWithoutInds; +} + +std::ostream &TabCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + out << act; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + /* If there are actions, emit them. Otherwise emit zero. */ + int act = 0; + if ( trans->action != 0 ) + act = trans->action->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &TabCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::COND_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the key offset. */ + out << curKeyOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the key offset ahead. */ + curKeyOffset += st->stateCondList.length(); + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::KEY_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the key offset. */ + out << curKeyOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the key offset ahead. */ + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + out << "\n"; + return out; +} + + +std::ostream &TabCodeGen::INDEX_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + out << st->stateCondList.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +std::ostream &TabCodeGen::SINGLE_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + out << st->outSingle.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::RANGE_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit length of range index. */ + out << st->outRange.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::TO_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::EOF_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the state's transitions. */ + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + /* Lower key. */ + out << KEY( sc->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + + /* Upper key. */ + out << KEY( sc->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_SPACES() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the state's transitions. */ + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + /* Cond Space id. */ + out << sc->condSpace->condSpaceId << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + out << KEY( stel->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + out << KEY( rtel->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + + /* Upper key. */ + out << KEY( rtel->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::INDICIES() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + out << stel->value->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + out << rtel->value->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + out << st->defTrans->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::TRANS_TARGS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default target state. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTIONS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::TRANS_TARGS_WI() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalStates = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write out the target state. */ + RedTransAp *trans = transPtrs[t]; + out << trans->targ->id; + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalStates % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTIONS_WI() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalAct = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write the function for the transition. */ + RedTransAp *trans = transPtrs[t]; + TRANS_ACTION( trans ); + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalAct % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + +void TabCodeGen::LOCATE_TRANS() +{ + out << + " _keys = " << ARR_OFF( K(), KO() + "[" + CS() + "]" ) << ";\n" + " _trans = " << IO() << "[" << CS() << "];\n" + "\n" + " _klen = " << SL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + _klen - 1;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_WIDE_KEY() << " < *_mid )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_WIDE_KEY() << " > *_mid )\n" + " _lower = _mid + 1;\n" + " else {\n" + " _trans += (_mid - _keys);\n" + " goto _match;\n" + " }\n" + " }\n" + " _keys += _klen;\n" + " _trans += _klen;\n" + " }\n" + "\n" + " _klen = " << RL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " _trans += ((_mid - _keys)>>1);\n" + " goto _match;\n" + " }\n" + " }\n" + " _trans += _klen;\n" + " }\n" + "\n"; +} + +void TabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void TabCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void TabCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void TabCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void TabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << + TOP() << "]; " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void TabCodeGen::writeOutData() +{ + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyConditions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxCondOffset), CO() ); + COND_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondLen), CL() ); + COND_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxCondSpaceId), C() ); + COND_SPACES(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( ARRAY_TYPE(maxKeyOffset), KO() ); + KEY_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxSingleLen), SL() ); + SINGLE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxRangeLen), RL() ); + RANGE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxIndexOffset), IO() ); + INDEX_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + if ( useIndicies ) { + OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS_WI(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() ); + TRANS_ACTIONS_WI(); + CLOSE_ARRAY() << + "\n"; + } + } + else { + OPEN_ARRAY( ARRAY_TYPE(maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + } + + if ( anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( cgd->writeFirstFinal ) { + STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( cgd->writeErr ) { + STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n" + "\n"; + } +} + +void TabCodeGen::COND_TRANSLATE() +{ + out << + " _widec = " << GET_KEY() << ";\n" + " _klen = " << CL() << "[" << CS() << "];\n" + " _keys = " << ARR_OFF( CK(), "(" + CO() + "[" + CS() + "]*2)" ) << ";\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " switch ( " << C() << "[" << CO() << "[" << CS() << "]" + " + ((_mid - _keys)>>1)] ) {\n"; + + for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) { + CondSpace *condSpace = csi; + out << " case " << condSpace->condSpaceId << ": {\n"; + out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(2) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } + + out << + " break;\n" + " }\n"; + } + + SWITCH_DEFAULT(); + + out << + " }\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void TabCodeGen::writeOutExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _klen"; + + if ( anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " " << UINT() << " _trans;\n"; + + if ( anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + "\n"; + + if ( cgd->hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + out << "_match:\n"; + + if ( anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + if ( useIndicies ) + out << " _trans = " << I() << "[_trans];\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 )\n {\n" + " switch ( *_acts++ )\n {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() ) + out << "_again:\n"; + + if ( anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( cgd->hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + + +void TabCodeGen::writeOutEOF() +{ + if ( anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/rlcodegen/tabcodegen.h b/rlcodegen/tabcodegen.h new file mode 100644 index 0000000..34af924 --- /dev/null +++ b/rlcodegen/tabcodegen.h @@ -0,0 +1,110 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Eric Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _TABCODEGEN_H +#define _TABCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +/* + * TabCodeGen + */ +class TabCodeGen : virtual public FsmCodeGen +{ +public: + virtual ~TabCodeGen() { } + virtual void writeOutData(); + virtual void writeOutExec(); + +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + std::ostream &COND_KEYS(); + std::ostream &COND_SPACES(); + std::ostream &KEYS(); + std::ostream &INDICIES(); + std::ostream &COND_OFFSETS(); + std::ostream &KEY_OFFSETS(); + std::ostream &INDEX_OFFSETS(); + std::ostream &COND_LENS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + std::ostream &TRANS_TARGS(); + std::ostream &TRANS_ACTIONS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &TRANS_ACTIONS_WI(); + void LOCATE_TRANS(); + + void COND_TRANSLATE(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + virtual void calcIndexSize(); + virtual void writeOutEOF(); +}; + + +/* + * CTabCodeGen + */ +struct CTabCodeGen + : public TabCodeGen, public CCodeGen +{ +}; + +/* + * DTabCodeGen + */ +struct DTabCodeGen + : public TabCodeGen, public DCodeGen +{ +}; + + +#endif /* _TABCODEGEN_H */ diff --git a/rlcodegen/xmlparse.kh b/rlcodegen/xmlparse.kh new file mode 100644 index 0000000..3bed642 --- /dev/null +++ b/rlcodegen/xmlparse.kh @@ -0,0 +1,119 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _XMLPARSE_H +#define _XMLPARSE_H + +#include "vector.h" +#include "rlcodegen.h" +#include "gendata.h" +#include <iostream> + +using std::ostream; + +struct XMLTagHashPair +{ + char *name; + int id; +}; + +struct Token +{ + XMLTag *tag; + InputLoc loc; +}; + +struct InlineItem; +struct InlineList; + +struct LmSwitchVect; +struct LmSwitchAction; + +//#include "xmlpdefs.h" + +/* These come from the scanner and point back into the parser. We will borrow + * them for error reporting. */ +//extern YYSTYPE *yylval; +//extern YYLTYPE *yylloc; + +//int yylex( YYSTYPE *, YYLTYPE *); +void scannerInit(); + +extern char *lelNames[]; + +struct LangEl; + +struct Parser +{ + %%{ + parser Parser; + + token TAG_unknown, TAG_ragel, TAG_ragel_def, TAG_host, TAG_state_list, + TAG_state, TAG_trans_list, TAG_t, TAG_machine, TAG_start_state, + TAG_action_list, TAG_action_table_list, TAG_action, + TAG_action_table, TAG_alphtype, TAG_element, TAG_getkey, + TAG_state_actions, TAG_entry_points, TAG_sub_action, + TAG_cond_space_list, TAG_cond_space, TAG_cond_list, TAG_c; + + # Inline block tokens. + token TAG_text, TAG_goto, TAG_call, TAG_next, TAG_goto_expr, + TAG_call_expr, TAG_next_expr, TAG_ret, TAG_pchar, TAG_char, + TAG_hold, TAG_exec, TAG_holdte, TAG_execte, TAG_curs, TAG_targs, + TAG_entry, TAG_data, TAG_lm_switch, TAG_init_act, TAG_set_act, + TAG_set_tokend, TAG_get_tokend, TAG_init_tokstart, + TAG_set_tokstart, TAG_write, TAG_curstate, TAG_access, TAG_break, + TAG_option; + + interface; + }%% + + Parser( char *fileName ) + : fileName(fileName), sourceFileName(0) + { + //pd = new ParseData( fileName, sectionName, sectionLoc ); + } + + int token( int id ); + int token( int tokenId, Token &token ); + int token( XMLTag *tag, int col, int line ); + + /* Report an error encountered by the parser. */ + ostream &error(); + ostream &error( const InputLoc &loc ); + ostream &parser_error( int tokId, Token &token ); + + /* The name of the root section, this does not change during an include. */ + char *fileName; + + /* Collected during parsing. */ + char *sourceFileName; + char *attrKey; + char *attrValue; + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; + + CodeGenMap codeGenMap; +}; + +#endif /* _XMLPARSE_H */ diff --git a/rlcodegen/xmlparse.kl b/rlcodegen/xmlparse.kl new file mode 100644 index 0000000..7e2dbf0 --- /dev/null +++ b/rlcodegen/xmlparse.kl @@ -0,0 +1,875 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "xmlparse.h" +#include "rlcodegen.h" +#include "common.h" +#include "gendata.h" +#include <iostream> + +using std::cout; +using std::ostream; +using std::istream; +using std::cerr; +using std::endl; + +Key readKey( char *td, char **end ); +long readOffsetPtr( char *td, char **end ); +unsigned long readLength( char *td ); + +%%{ + +parser Parser; + +include "xmlparse.kh"; + +start: tag_ragel; +start: + final { + /* If we get no input the assumption is that the frontend died and + * emitted an error. */ + gblErrorCount += 1; + }; + +tag_ragel: tag_ragel_head host_or_def_list '/' TAG_ragel; + +tag_ragel_head: TAG_ragel + final { + Attribute *fileNameAttr = $1->tag->findAttr( "filename" ); + if ( fileNameAttr == 0 ) { + xml_error($1->loc) << "tag <ragel> requires a filename attribute" << endl; + exit(1); + } + else + sourceFileName = fileNameAttr->value; + + Attribute *langAttr = $1->tag->findAttr( "lang" ); + if ( langAttr == 0 ) + xml_error($1->loc) << "tag <ragel> requires a lang attribute" << endl; + else { + if ( strcmp( langAttr->value, "C" ) == 0 ) { + hostLangType = CCode; + hostLang = &hostLangC; + } + else if ( strcmp( langAttr->value, "D" ) == 0 ) { + hostLangType = DCode; + hostLang = &hostLangD; + } + else if ( strcmp( langAttr->value, "Java" ) == 0 ) { + hostLangType = JavaCode; + hostLang = &hostLangJava; + } + } + + /* Eventually more types will be supported. */ + if ( hostLangType == JavaCode && codeStyle != GenTables ) { + error() << "java: only the table code style -T0 is " + "currently supported" << endl; + } + + openOutput( sourceFileName ); + }; + +host_or_def_list: host_or_def_list host_or_def; +host_or_def_list: ; + +host_or_def: host; +host_or_def: ragel_def; + +host: + TAG_host '/' TAG_host + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + if ( lineAttr == 0 ) + xml_error($1->loc) << "tag <host> requires a line attribute" << endl; + else { + int line = atoi( lineAttr->value ); + if ( outputFormat == OutCode ) + lineDirective( *outStream, sourceFileName, line ); + } + + if ( outputFormat == OutCode ) + *outStream << $3->tag->content; + }; + +ragel_def: + tag_ragel_def_head ragel_def_item_list '/' TAG_ragel_def + final { + if ( gblErrorCount == 0 ) + cgd->generate(); + }; + +tag_ragel_def_head: TAG_ragel_def + final { + bool wantComplete = outputFormat != OutGraphvizDot; + + char *fsmName = 0; + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr != 0 ) { + fsmName = nameAttr->value; + + CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); + if ( mapEl != 0 ) + cgd = mapEl->value; + else { + cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); + codeGenMap.insert( fsmName, cgd ); + } + } + else { + cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); + } + + cgd->writeOps = 0; + cgd->writeData = false; + cgd->writeInit = false; + cgd->writeExec = false; + cgd->writeEOF = false; + ::keyOps = &cgd->thisKeyOps; + }; + +ragel_def_item_list: ragel_def_item_list ragel_def_item; +ragel_def_item_list: ; + +ragel_def_item: tag_alph_type; +ragel_def_item: tag_getkey_expr; +ragel_def_item: tag_access_expr; +ragel_def_item: tag_curstate_expr; +ragel_def_item: tag_machine; +ragel_def_item: tag_write; + +tag_alph_type: TAG_alphtype '/' TAG_alphtype + final { + if ( ! cgd->setAlphType( $3->tag->content ) ) + xml_error($1->loc) << "tag <alphtype> specifies unknown alphabet type" << endl; + }; + +tag_getkey_expr: TAG_getkey inline_list '/' TAG_getkey + final { + cgd->getKeyExpr = $2->inlineList; + }; + +tag_access_expr: TAG_access inline_list '/' TAG_access + final { + cgd->accessExpr = $2->inlineList; + }; + +tag_curstate_expr: TAG_curstate inline_list '/' TAG_curstate + final { + cgd->curStateExpr = $2->inlineList; + }; + +tag_write: TAG_write write_option_list '/' TAG_write + final { + Attribute *what = $1->tag->findAttr( "what" ); + if ( what == 0 ) { + xml_error($1->loc) << "tag <write> requires a what attribute" << endl; + } + else { + if ( strcmp( what->value, "data" ) == 0 ) + cgd->writeData = true; + else if ( strcmp( what->value, "init" ) == 0 ) + cgd->writeInit = true; + else if ( strcmp( what->value, "exec" ) == 0 ) + cgd->writeExec = true; + else if ( strcmp( what->value, "eof" ) == 0 ) + cgd->writeEOF = true; + } + }; + +write_option_list: write_option_list tag_option; +write_option_list: ; + +tag_option: TAG_option '/' TAG_option + final { + char *content = $3->tag->content; + if ( strcmp( content, "noend" ) == 0 ) + cgd->writeOps |= WO_NOEND; + else if ( strcmp( content, "noerror" ) == 0 ) + cgd->writeOps |= WO_NOERROR; + else if ( strcmp( content, "noprefix" ) == 0 ) + cgd->writeOps |= WO_NOPREFIX; + else if ( strcmp( content, "nofinal" ) == 0 ) + cgd->writeOps |= WO_NOFF; + else { + warning() << "unrecognized write option" << endl; + } + }; + +tag_machine: tag_machine_head machine_item_list '/' TAG_machine + final { + cgd->finishMachine(); + }; + +tag_machine_head: TAG_machine + final { + cgd->createMachine(); + }; + +machine_item_list: machine_item_list machine_item; +machine_item_list: ; + +machine_item: tag_start_state; +machine_item: tag_entry_points; +machine_item: tag_state_list; +machine_item: tag_action_list; +machine_item: tag_action_table_list; +machine_item: tag_cond_space_list; + +# +# States. +# + +tag_start_state: TAG_start_state '/' TAG_start_state + final { + unsigned long startState = strtoul( $3->tag->content, 0, 10 ); + cgd->setStartState( startState ); + }; + +tag_entry_points: TAG_entry_points entry_point_list '/' TAG_entry_points + final { + Attribute *errorAttr = $1->tag->findAttr( "error" ); + if ( errorAttr != 0 ) + cgd->setForcedErrorState(); + }; + +entry_point_list: entry_point_list tag_entry; +entry_point_list: ; + +tag_entry: TAG_entry '/' TAG_entry + final { + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr == 0 ) { + xml_error($1->loc) << "tag <entry_points>::<entry> " + "requires a name attribute" << endl; + } + else { + char *data = $3->tag->content; + unsigned long entry = strtoul( data, &data, 10 ); + cgd->addEntryPoint( nameAttr->value, entry ); + } + }; + +tag_state_list: tag_state_list_head state_list '/' TAG_state_list; + +tag_state_list_head: TAG_state_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <state_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initStateList( length ); + curState = 0; + } + }; + +state_list: state_list tag_state; +state_list: ; + +tag_state: TAG_state state_item_list '/' TAG_state + final { + Attribute *lengthAttr = $1->tag->findAttr( "final" ); + if ( lengthAttr != 0 ) + cgd->setFinal( curState ); + curState += 1; + }; + +state_item_list: state_item_list state_item; +state_item_list: ; + +state_item: tag_state_actions; +state_item: tag_state_cond_list; +state_item: tag_trans_list; + +tag_state_actions: TAG_state_actions '/' TAG_state_actions + final { + char *ad = $3->tag->content; + + long toStateAction = readOffsetPtr( ad, &ad ); + long fromStateAction = readOffsetPtr( ad, &ad ); + long eofAction = readOffsetPtr( ad, &ad ); + + cgd->setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + }; + +tag_state_cond_list: tag_state_cond_list_head state_cond_list '/' TAG_cond_list; + +tag_state_cond_list_head: TAG_cond_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <cond_list> requires a length attribute" << endl; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initStateCondList( curState, length ); + curStateCond = 0; + } + }; + +state_cond_list: state_cond_list state_cond; +state_cond_list: ; + +state_cond: TAG_c '/' TAG_c + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long condId = readOffsetPtr( td, &td ); + cgd->addStateCond( curState, lowKey, highKey, condId ); + }; + +tag_trans_list: tag_trans_list_head trans_list '/' TAG_trans_list + final { + cgd->finishTransList( curState ); + }; + +tag_trans_list_head: TAG_trans_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <trans_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initTransList( curState, length ); + curTrans = 0; + } + }; + +trans_list: trans_list tag_trans; +trans_list: ; + +tag_trans: TAG_t '/' TAG_t + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long targ = readOffsetPtr( td, &td ); + long action = readOffsetPtr( td, &td ); + + cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); + }; + +# +# Action Lists. +# + +tag_action_list: tag_action_list_head action_list '/' TAG_action_list; + +tag_action_list_head: TAG_action_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <action_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionList( length ); + curAction = 0; + } + }; + +action_list: action_list tag_action; +action_list: ; + +# +# Actions. +# + +tag_action: TAG_action inline_list '/' TAG_action + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + Attribute *colAttr = $1->tag->findAttr( "col" ); + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( lineAttr == 0 || colAttr == 0) + xml_error($1->loc) << "tag <action> requires a line and col attributes" << endl; + else { + unsigned long line = strtoul( lineAttr->value, 0, 10 ); + unsigned long col = strtoul( colAttr->value, 0, 10 ); + + char *name = 0; + if ( nameAttr != 0 ) + name = nameAttr->value; + + cgd->newAction( curAction++, name, line, col, $2->inlineList ); + } + }; + +nonterm inline_list +{ + InlineList *inlineList; +}; + + +inline_list: inline_list inline_item + final { + /* Append the item to the list, return the list. */ + $1->inlineList->append( $2->inlineItem ); + $$->inlineList = $1->inlineList; + }; + +inline_list: + final { + /* Start with empty list. */ + $$->inlineList = new InlineList; + }; + +nonterm inline_item_type +{ + InlineItem *inlineItem; +}; + +nonterm inline_item uses inline_item_type; + +inline_item: tag_text final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_ret final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_break final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_pchar final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_char final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_hold final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_exec final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_holdte final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_execte final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_curs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_targs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_il_entry final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_get_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_sub_action final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_lm_switch final { $$->inlineItem = $1->inlineItem; }; + +nonterm tag_text uses inline_item_type; +nonterm tag_goto uses inline_item_type; +nonterm tag_call uses inline_item_type; +nonterm tag_next uses inline_item_type; +nonterm tag_goto_expr uses inline_item_type; +nonterm tag_call_expr uses inline_item_type; +nonterm tag_next_expr uses inline_item_type; +nonterm tag_ret uses inline_item_type; +nonterm tag_break uses inline_item_type; +nonterm tag_pchar uses inline_item_type; +nonterm tag_char uses inline_item_type; +nonterm tag_hold uses inline_item_type; +nonterm tag_exec uses inline_item_type; +nonterm tag_holdte uses inline_item_type; +nonterm tag_execte uses inline_item_type; +nonterm tag_curs uses inline_item_type; +nonterm tag_targs uses inline_item_type; +nonterm tag_il_entry uses inline_item_type; +nonterm tag_init_tokstart uses inline_item_type; +nonterm tag_init_act uses inline_item_type; +nonterm tag_get_tokend uses inline_item_type; +nonterm tag_set_tokstart uses inline_item_type; +nonterm tag_set_tokend uses inline_item_type; +nonterm tag_set_act uses inline_item_type; +nonterm tag_sub_action uses inline_item_type; +nonterm tag_lm_switch uses inline_item_type; + +tag_text: TAG_text '/' TAG_text + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Text ); + $$->inlineItem->data = $3->tag->content; + }; + +tag_goto: TAG_goto '/' TAG_goto + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Goto ); + $$->inlineItem->targId = targ; + }; + +tag_call: TAG_call '/' TAG_call + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Call ); + $$->inlineItem->targId = targ; + }; + +tag_next: TAG_next '/' TAG_next + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Next ); + $$->inlineItem->targId = targ; + }; + +tag_goto_expr: TAG_goto_expr inline_list '/' TAG_goto_expr + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::GotoExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_call_expr: TAG_call_expr inline_list '/' TAG_call_expr + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::CallExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_next_expr: TAG_next_expr inline_list '/' TAG_next_expr + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::NextExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_ret: TAG_ret '/' TAG_ret + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Ret ); + }; + +tag_break: TAG_break '/' TAG_break + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Break ); + }; + +tag_pchar: TAG_pchar '/' TAG_pchar + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::PChar ); + }; + +tag_char: TAG_char '/' TAG_char + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Char ); + }; + +tag_hold: TAG_hold '/' TAG_hold + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Hold ); + }; + +tag_exec: TAG_exec inline_list '/' TAG_exec + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Exec ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_holdte: TAG_holdte '/' TAG_holdte + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::HoldTE ); + }; + +tag_execte: TAG_execte inline_list '/' TAG_execte + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::ExecTE ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_curs: TAG_curs '/' TAG_curs + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Curs ); + }; + +tag_targs: TAG_targs '/' TAG_targs + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Targs ); + }; + +tag_il_entry: TAG_entry '/' TAG_entry + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Entry ); + $$->inlineItem->targId = targ; + }; + +tag_init_tokstart: TAG_init_tokstart '/' TAG_init_tokstart + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitTokStart ); + }; + +tag_init_act: TAG_init_act '/' TAG_init_act + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitAct ); + }; + +tag_get_tokend: TAG_get_tokend '/' TAG_get_tokend + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd ); + }; + +tag_set_tokstart: TAG_set_tokstart '/' TAG_set_tokstart + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokStart ); + cgd->hasLongestMatch = true; + }; + +tag_set_tokend: TAG_set_tokend '/' TAG_set_tokend + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ); + $$->inlineItem->offset = strtol( $3->tag->content, 0, 10 ); + }; + +tag_set_act: TAG_set_act '/' TAG_set_act + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetActId ); + $$->inlineItem->lmId = strtol( $3->tag->content, 0, 10 ); + }; + +tag_sub_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + }; + +# Action switches. +tag_lm_switch: TAG_lm_switch lm_action_list '/' TAG_lm_switch + final { + bool handlesError = false; + Attribute *handlesErrorAttr = $1->tag->findAttr( "handles_error" ); + if ( handlesErrorAttr != 0 ) + handlesError = true; + + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSwitch ); + $$->inlineItem->children = $2->inlineList; + $$->inlineItem->handlesError = handlesError; + }; + +nonterm lm_action_list +{ + InlineList *inlineList; +}; + +lm_action_list: lm_action_list tag_inline_action + final { + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; +lm_action_list: + final { + $$->inlineList = new InlineList; + }; + +nonterm tag_inline_action uses inline_item_type; + +tag_inline_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( idAttr != 0 ) { + unsigned long id = strtoul( idAttr->value, 0, 10 ); + $$->inlineItem->lmId = id; + } + }; + +# +# Lists of Actions. +# + +tag_action_table_list: + tag_action_table_list_head action_table_list '/' TAG_action_table_list; + +tag_action_table_list_head: TAG_action_table_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + xml_error($1->loc) << "tag <action_table_list> requires " + "a length attribute" << endl; + } + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionTableList( length ); + curActionTable = 0; + } + }; + +action_table_list: action_table_list tag_action_table; +action_table_list: ; + +tag_action_table: TAG_action_table '/' TAG_action_table + final { + /* Find the length of the action table. */ + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <at> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + + /* Collect the action table. */ + RedAction *redAct = cgd->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + char *ptr = $3->tag->content; + int pos = 0; + while ( *ptr != 0 ) { + unsigned long actionId = strtoul( ptr, &ptr, 10 ); + redAct->key[pos].key = 0; + redAct->key[pos].value = cgd->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + cgd->redFsm->actionMap.insert( redAct ); + } + + curActionTable += 1; + }; + +# +# Conditions. +# + +tag_cond_space_list: tag_cond_space_list_head cond_space_list '/' TAG_cond_space_list; + +tag_cond_space_list_head: TAG_cond_space_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + xml_error($1->loc) << "tag <cond_space_list> " + "requires a length attribute" << endl; + } + else { + ulong length = readLength( lengthAttr->value ); + cgd->initCondSpaceList( length ); + curCondSpace = 0; + } + }; + +cond_space_list: cond_space_list tag_cond_space; +cond_space_list: tag_cond_space; + +tag_cond_space: TAG_cond_space '/' TAG_cond_space + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <cond_space> requires a length attribute" << endl; + else { + if ( lengthAttr == 0 ) + xml_error($1->loc) << "tag <cond_space> requires an id attribute" << endl; + else { + unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); + ulong length = readLength( lengthAttr->value ); + + char *td = $3->tag->content; + Key baseKey = readKey( td, &td ); + + cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( ulong a = 0; a < length; a++ ) { + long actionOffset = readOffsetPtr( td, &td ); + cgd->condSpaceItem( curCondSpace, actionOffset ); + } + curCondSpace += 1; + } + } + }; + +}%% + +unsigned long readLength( char *td ) +{ + return strtoul( td, 0, 10 ); +} + +Key readKey( char *td, char **end ) +{ + if ( keyOps->isSigned ) + return Key( strtol( td, end, 10 ) ); + else + return Key( strtoul( td, end, 10 ) ); +} + +long readOffsetPtr( char *td, char **end ) +{ + while ( *td == ' ' || *td == '\t' ) + td++; + + if ( *td == 'x' ) { + if ( end != 0 ) + *end = td + 1; + return -1; + } + + return strtol( td, end, 10 ); +} + +ostream &Parser::error() +{ + gblErrorCount += 1; + cerr << PROGNAME ": "; + return cerr; +} + +ostream &Parser::error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + +ostream &Parser::parser_error( int tokId, Token &token ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << token.loc.line << ":" << token.loc.col; + if ( token.tag != 0 ) { + if ( token.tag->tagId == 0 ) + cerr << ": at unknown tag"; + else + cerr << ": at tag <" << token.tag->tagId->name << ">"; + } + cerr << ": "; + + return cerr; +} + +int Parser::token( int tokenId, Token &tok ) +{ + int res = parseLangEl( tokenId, tok ); + if ( res < 0 ) { + parser_error( tokenId, tok ) << "parse error" << endl; + exit(1); + } + return res; +} + +int Parser::token( int tokenId ) +{ + Token tok; + tok.tag = 0; + return token( tokenId, tok ); +} + +int Parser::token( XMLTag *tag, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = tag; + + if ( tag->type == XMLTag::Close ) { + int res = token( '/', tok ); + if ( res < 0 ) + return res; + } + + tok.tag = tag; + return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok ); +} diff --git a/rlcodegen/xmlparse.y b/rlcodegen/xmlparse.y new file mode 100644 index 0000000..a837c87 --- /dev/null +++ b/rlcodegen/xmlparse.y @@ -0,0 +1,978 @@ +/* + * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +%{ + +#include <iostream> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include "rlcodegen.h" +#include "vector.h" +#include "xmlparse.h" +#include "gendata.h" + +using std::cerr; +using std::endl; + +char *sourceFileName; +char *attrKey; +char *attrValue; +int curAction; +int curActionTable; +int curTrans; +int curState; +int curCondSpace; +int curStateCond; + +Key readKey( char *td, char **end ); +long readOffsetPtr( char *td, char **end ); +unsigned long readLength( char *td ); + +CodeGenMap codeGenMap; + +%} + +%pure-parser + +%union { + /* General data types. */ + char c; + char *data; + int integer; + AttrList *attrList; + + /* Inline parse tree items. */ + InlineItem *ilitem; + InlineList *illist; +} + +%token TAG_unknown +%token TAG_ragel +%token TAG_ragel_def +%token TAG_host +%token TAG_state_list +%token TAG_state +%token TAG_trans_list +%token TAG_t +%token TAG_machine +%token TAG_start_state +%token TAG_action_list +%token TAG_action_table_list +%token TAG_action +%token TAG_action_table +%token TAG_alphtype +%token TAG_element +%token TAG_getkey +%token TAG_state_actions +%token TAG_entry_points +%token TAG_sub_action +%token TAG_cond_space_list +%token TAG_cond_space +%token TAG_cond_list +%token TAG_c + +/* Inline block tokens. */ +%token TAG_text +%token TAG_goto +%token TAG_call +%token TAG_next +%token TAG_goto_expr +%token TAG_call_expr +%token TAG_next_expr +%token TAG_ret +%token TAG_pchar +%token TAG_char +%token TAG_hold +%token TAG_exec +%token TAG_holdte +%token TAG_execte +%token TAG_curs +%token TAG_targs +%token TAG_entry +%token TAG_data +%token TAG_lm_switch +%token TAG_init_act +%token TAG_set_act +%token TAG_set_tokend +%token TAG_get_tokend +%token TAG_init_tokstart +%token TAG_set_tokstart +%token TAG_write +%token TAG_curstate +%token TAG_access +%token TAG_break +%token TAG_option + +%token <data> XML_Word +%token <data> XML_Literal +%type <attrList> AttributeList + +%type <illist> InlineList +%type <ilitem> InlineItem +%type <illist> LmActionList + +%type <ilitem> TagText +%type <ilitem> TagGoto +%type <ilitem> TagCall +%type <ilitem> TagNext +%type <ilitem> TagGotoExpr +%type <ilitem> TagCallExpr +%type <ilitem> TagNextExpr +%type <ilitem> TagRet +%type <ilitem> TagBreak +%type <ilitem> TagPChar +%type <ilitem> TagChar +%type <ilitem> TagHold +%type <ilitem> TagExec +%type <ilitem> TagHoldTE +%type <ilitem> TagExecTE +%type <ilitem> TagCurs +%type <ilitem> TagTargs +%type <ilitem> TagIlEntry +%type <ilitem> TagLmSwitch +%type <ilitem> TagLmSetActId +%type <ilitem> TagLmGetTokEnd +%type <ilitem> TagLmSetTokEnd +%type <ilitem> TagLmInitTokStart +%type <ilitem> TagLmInitAct +%type <ilitem> TagLmSetTokStart +%type <ilitem> TagInlineAction +%type <ilitem> TagSubAction + +%% + +/* Input is any number of input sections. An empty file is accepted. */ +input: + TagRagel | + /* Nothing */ { + /* Assume the frontend died if we get no input. It will emit an error. + * Cause us to return an error code. */ + gblErrorCount += 1; + }; + +TagRagel: + TagRagelHead + HostOrDefList + '<' '/' TAG_ragel '>'; + +TagRagelHead: + '<' TAG_ragel AttributeList '>' { + Attribute *fileNameAttr = $3->find( "filename" ); + if ( fileNameAttr == 0 ) + xml_error(@2) << "tag <ragel> requires a filename attribute" << endl; + else + sourceFileName = fileNameAttr->value; + + Attribute *langAttr = $3->find( "lang" ); + if ( langAttr == 0 ) + xml_error(@2) << "tag <ragel> requires a lang attribute" << endl; + else { + if ( strcmp( langAttr->value, "C" ) == 0 ) { + hostLangType = CCode; + hostLang = &hostLangC; + } + else if ( strcmp( langAttr->value, "D" ) == 0 ) { + hostLangType = DCode; + hostLang = &hostLangD; + } + else if ( strcmp( langAttr->value, "Java" ) == 0 ) { + hostLangType = JavaCode; + hostLang = &hostLangJava; + } + } + + /* Eventually more types will be supported. */ + if ( hostLangType == JavaCode && codeStyle != GenTables ) { + error() << "java: only the table code style -T0 is " + "currently supported" << endl; + } + + openOutput( sourceFileName ); + }; + +AttributeList: + AttributeList Attribute { + $$ = $1; + $$->append( Attribute( attrKey, attrValue ) ); + } | + /* Nothing */ { + $$ = new AttrList; + }; + +Attribute: + XML_Word '=' XML_Literal { + attrKey = $1; + attrValue = $3; + }; + +HostOrDefList: + HostOrDefList HostOrDef | + /* Nothing */; + +HostOrDef: + TagHost | TagRagelDef; + +TagHost: + TagHostHead + '<' '/' TAG_host '>' { + if ( outputFormat == OutCode ) + *outStream << xmlData.data; + }; + +TagHostHead: + '<' TAG_host AttributeList '>' { + Attribute *lineAttr = $3->find( "line" ); + if ( lineAttr == 0 ) + xml_error(@2) << "tag <host> requires a line attribute" << endl; + else { + int line = atoi( lineAttr->value ); + if ( outputFormat == OutCode ) + lineDirective( *outStream, sourceFileName, line ); + } + }; + +TagRagelDef: + RagelDefHead + RagelDefItemList + '<' '/' TAG_ragel_def '>' { + if ( gblErrorCount == 0 ) + cgd->generate(); + }; + +RagelDefHead: + '<' TAG_ragel_def AttributeList '>' { + bool wantComplete = outputFormat != OutGraphvizDot; + + char *fsmName = 0; + Attribute *nameAttr = $3->find( "name" ); + if ( nameAttr != 0 ) { + fsmName = nameAttr->value; + + CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); + if ( mapEl != 0 ) + cgd = mapEl->value; + else { + cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); + codeGenMap.insert( fsmName, cgd ); + } + } + else { + cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); + } + + cgd->writeOps = 0; + cgd->writeData = false; + cgd->writeInit = false; + cgd->writeExec = false; + cgd->writeEOF = false; + ::keyOps = &cgd->thisKeyOps; + }; + +RagelDefItemList: + RagelDefItemList RagelDefItem | + /* Nothing */; + +RagelDefItem: + TagAlphType | + TagGetKeyExpr | + TagAccessExpr | + TagCurStateExpr | + TagMachine | + TagWrite; + +TagWrite: + '<' TAG_write AttributeList '>' + OptionList + '<' '/' TAG_write '>' { + Attribute *what = $3->find( "what" ); + if ( what == 0 ) { + xml_error(@2) << "tag <write> requires a what attribute" << endl; + } + else { + if ( strcmp( what->value, "data" ) == 0 ) + cgd->writeData = true; + else if ( strcmp( what->value, "init" ) == 0 ) + cgd->writeInit = true; + else if ( strcmp( what->value, "exec" ) == 0 ) + cgd->writeExec = true; + else if ( strcmp( what->value, "eof" ) == 0 ) + cgd->writeEOF = true; + } + }; + +OptionList: + OptionList TagOption | + /* Nothing */; + +TagOption: + '<' TAG_option '>' + '<' '/' TAG_option '>' { + if ( strcmp( xmlData.data, "noend" ) == 0 ) + cgd->writeOps |= WO_NOEND; + else if ( strcmp( xmlData.data, "noerror" ) == 0 ) + cgd->writeOps |= WO_NOERROR; + else if ( strcmp( xmlData.data, "noprefix" ) == 0 ) + cgd->writeOps |= WO_NOPREFIX; + else if ( strcmp( xmlData.data, "nofinal" ) == 0 ) + cgd->writeOps |= WO_NOFF; + else { + warning() << "unrecognized write option" << endl; + } + }; + + +TagAlphType: + '<' TAG_alphtype '>' + '<' '/' TAG_alphtype '>' { + if ( ! cgd->setAlphType( xmlData.data ) ) + xml_error(@2) << "tag <alphtype> specifies unknown alphabet type" << endl; + }; + +TagGetKeyExpr: + '<' TAG_getkey '>' + InlineList + '<' '/' TAG_getkey '>' { + cgd->getKeyExpr = $4; + }; + +TagAccessExpr: + '<' TAG_access '>' + InlineList + '<' '/' TAG_access '>' { + cgd->accessExpr = $4; + }; + +TagCurStateExpr: + '<' TAG_curstate '>' + InlineList + '<' '/' TAG_curstate '>' { + cgd->curStateExpr = $4; + }; + +TagMachine: + TagMachineHead + MachineItemList + '<' '/' TAG_machine '>' { + cgd->finishMachine(); + }; + +TagMachineHead: + '<' TAG_machine '>' { + cgd->createMachine(); + }; + +MachineItemList: + MachineItemList MachineItem | + /* Nothing */; + +MachineItem: + TagStartState | + TagEntryPoints | + TagStateList | + TagActionList | + TagActionTableList | + TagCondSpaceList; + +TagStartState: + '<' TAG_start_state '>' + '<' '/' TAG_start_state '>' { + unsigned long startState = strtoul( xmlData.data, 0, 10 ); + cgd->setStartState( startState ); + }; + +TagEntryPoints: + '<' TAG_entry_points AttributeList '>' + EntryPointList + '<' '/' TAG_entry_points '>' { + Attribute *errorAttr = $3->find( "error" ); + if ( errorAttr != 0 ) + cgd->setForcedErrorState(); + }; + +EntryPointList: + EntryPointList TagEntry | + /* Nothing */; + +TagEntry: + '<' TAG_entry AttributeList '>' + '<' '/' TAG_entry '>' { + Attribute *nameAttr = $3->find( "name" ); + if ( nameAttr == 0 ) + xml_error(@2) << "tag <entry_points>::<entry> requires a name attribute" << endl; + else { + char *data = xmlData.data; + unsigned long entry = strtoul( data, &data, 10 ); + cgd->addEntryPoint( nameAttr->value, entry ); + } + }; + +TagStateList: + TagStateListHead + StateList + '<' '/' TAG_state_list '>'; + +TagStateListHead: + '<' TAG_state_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <state_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initStateList( length ); + curState = 0; + } + }; + +StateList: + StateList TagState | + /* Nothing */; + +TagState: + TagStateHead + StateItemList + '<' '/' TAG_state '>' { + curState += 1; + }; + +TagStateHead: + '<' TAG_state AttributeList '>' { + Attribute *lengthAttr = $3->find( "final" ); + if ( lengthAttr != 0 ) + cgd->setFinal( curState ); + }; + +StateItemList: + StateItemList StateItem | + /* Nothing */; + +StateItem: + TagStateActions | + TagStateCondList | + TagTransList; + +TagStateActions: + '<' TAG_state_actions '>' + '<' '/' TAG_state_actions '>' { + char *ad = xmlData.data; + + long toStateAction = readOffsetPtr( ad, &ad ); + long fromStateAction = readOffsetPtr( ad, &ad ); + long eofAction = readOffsetPtr( ad, &ad ); + + cgd->setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + }; + +TagStateCondList: + TagStateCondListHead + StateCondList + '<' '/' TAG_cond_list '>'; + +TagStateCondListHead: + '<' TAG_cond_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <cond_list> requires a length attribute" << endl; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initStateCondList( curState, length ); + curStateCond = 0; + } + } + +StateCondList: + StateCondList StateCond | + /* Empty */; + +StateCond: + '<' TAG_c '>' + '<' '/' TAG_c '>' { + char *td = xmlData.data; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long condId = readOffsetPtr( td, &td ); + cgd->addStateCond( curState, lowKey, highKey, condId ); + } + +TagTransList: + TagTransListHead + TransList + '<' '/' TAG_trans_list '>' { + cgd->finishTransList( curState ); + }; + +TagTransListHead: + '<' TAG_trans_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <trans_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initTransList( curState, length ); + curTrans = 0; + } + }; + +TransList: + TransList TagTrans | + /* Nothing */; + +TagTrans: + '<' TAG_t AttributeList '>' + '<' '/' TAG_t '>' { + char *td = xmlData.data; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long targ = readOffsetPtr( td, &td ); + long action = readOffsetPtr( td, &td ); + + cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); + }; + +TagActionList: + TagActionListHead + ActionList + '<' '/' TAG_action_list '>'; + +TagActionListHead: + '<' TAG_action_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <action_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionList( length ); + curAction = 0; + } + }; + +ActionList: + ActionList TagAction | + /* Nothing */; + +TagAction: + '<' TAG_action AttributeList '>' + InlineList + '<' '/' TAG_action '>' { + Attribute *lineAttr = $3->find( "line" ); + Attribute *colAttr = $3->find( "col" ); + Attribute *nameAttr = $3->find( "name" ); + if ( lineAttr == 0 || colAttr == 0) + xml_error(@2) << "tag <action> requires a line and col attributes" << endl; + else { + unsigned long line = strtoul( lineAttr->value, 0, 10 ); + unsigned long col = strtoul( colAttr->value, 0, 10 ); + + char *name = 0; + if ( nameAttr != 0 ) + name = nameAttr->value; + + cgd->newAction( curAction++, name, line, col, $5 ); + } + }; + +InlineList: + InlineList InlineItem { + /* Append the item to the list, return the list. */ + $1->append( $2 ); + $$ = $1; + } | + /* Nothing */ { + /* Start with empty list. */ + $$ = new InlineList; + }; + +InlineItem: + TagText | + TagGoto | + TagCall | + TagNext | + TagGotoExpr | + TagCallExpr | + TagNextExpr | + TagRet | + TagBreak | + TagPChar | + TagChar | + TagHold | + TagExec | + TagHoldTE | + TagExecTE | + TagCurs | + TagTargs | + TagIlEntry | + TagLmSwitch | + TagLmSetActId | + TagLmSetTokEnd | + TagLmGetTokEnd | + TagSubAction | + TagLmInitTokStart | + TagLmInitAct | + TagLmSetTokStart; + +TagText: + '<' TAG_text AttributeList '>' + '<' '/' TAG_text '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Text ); + $$->data = strdup(xmlData.data); + }; + +TagGoto: + '<' TAG_goto '>' + '<' '/' TAG_goto '>' { + int targ = strtol( xmlData.data, 0, 10 ); + $$ = new InlineItem( InputLoc(), InlineItem::Goto ); + $$->targId = targ; + }; + +TagCall: + '<' TAG_call '>' + '<' '/' TAG_call '>' { + int targ = strtol( xmlData.data, 0, 10 ); + $$ = new InlineItem( InputLoc(), InlineItem::Call ); + $$->targId = targ; + }; + +TagNext: + '<' TAG_next '>' + '<' '/' TAG_next '>' { + int targ = strtol( xmlData.data, 0, 10 ); + $$ = new InlineItem( InputLoc(), InlineItem::Next ); + $$->targId = targ; + }; + +TagGotoExpr: + '<' TAG_goto_expr '>' + InlineList + '<' '/' TAG_goto_expr '>' { + $$ = new InlineItem( InputLoc(), InlineItem::GotoExpr ); + $$->children = $4; + }; + +TagCallExpr: + '<' TAG_call_expr '>' + InlineList + '<' '/' TAG_call_expr '>' { + $$ = new InlineItem( InputLoc(), InlineItem::CallExpr ); + $$->children = $4; + }; + +TagNextExpr: + '<' TAG_next_expr '>' + InlineList + '<' '/' TAG_next_expr '>' { + $$ = new InlineItem( InputLoc(), InlineItem::NextExpr ); + $$->children = $4; + }; + +TagRet: + '<' TAG_ret '>' + '<' '/' TAG_ret '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Ret ); + }; + +TagPChar: + '<' TAG_pchar '>' + '<' '/' TAG_pchar '>' { + $$ = new InlineItem( InputLoc(), InlineItem::PChar ); + }; + +TagChar: + '<' TAG_char '>' + '<' '/' TAG_char '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Char ); + }; + +TagHold: + '<' TAG_hold '>' + '<' '/' TAG_hold '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Hold ); + }; + +TagExec: + '<' TAG_exec '>' + InlineList + '<' '/' TAG_exec '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Exec ); + $$->children = $4; + }; + +TagHoldTE: + '<' TAG_holdte '>' + '<' '/' TAG_holdte '>' { + $$ = new InlineItem( InputLoc(), InlineItem::HoldTE ); + }; + +TagExecTE: + '<' TAG_execte '>' + InlineList + '<' '/' TAG_execte '>' { + $$ = new InlineItem( InputLoc(), InlineItem::ExecTE ); + $$->children = $4; + }; + +TagCurs: + '<' TAG_curs '>' + '<' '/' TAG_curs '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Curs ); + }; + +TagTargs: + '<' TAG_targs '>' + '<' '/' TAG_targs '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Targs ); + }; + +TagIlEntry: + '<' TAG_entry '>' + '<' '/' TAG_entry '>' { + int targ = strtol( xmlData.data, 0, 10 ); + $$ = new InlineItem( InputLoc(), InlineItem::Entry ); + $$->targId = targ; + }; + +TagBreak: + '<' TAG_break '>' + '<' '/' TAG_break '>' { + $$ = new InlineItem( InputLoc(), InlineItem::Break ); + }; + + +TagLmSwitch: + '<' TAG_lm_switch AttributeList '>' + LmActionList + '<' '/' TAG_lm_switch '>' { + bool handlesError = false; + Attribute *handlesErrorAttr = $3->find( "handles_error" ); + if ( handlesErrorAttr != 0 ) + handlesError = true; + + $$ = new InlineItem( InputLoc(), InlineItem::LmSwitch ); + $$->children = $5; + $$->handlesError = handlesError; + }; + +LmActionList: + LmActionList TagInlineAction { + $$ = $1; + $$->append( $2 ); + } | + /* Nothing */ { + $$ = new InlineList; + }; + +TagInlineAction: + '<' TAG_sub_action AttributeList '>' + InlineList + '<' '/' TAG_sub_action '>' { + $$ = new InlineItem( InputLoc(), InlineItem::SubAction ); + $$->children = $5; + + Attribute *idAttr = $3->find( "id" ); + if ( idAttr != 0 ) { + unsigned long id = strtoul( idAttr->value, 0, 10 ); + $$->lmId = id; + } + }; + +TagLmSetActId: + '<' TAG_set_act '>' + '<' '/' TAG_set_act '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmSetActId ); + $$->lmId = strtol( xmlData.data, 0, 10 ); + }; + +TagLmGetTokEnd: + '<' TAG_get_tokend '>' + '<' '/' TAG_get_tokend '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd ); + }; + +TagLmSetTokEnd: + '<' TAG_set_tokend '>' + '<' '/' TAG_set_tokend '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ); + $$->offset = strtol( xmlData.data, 0, 10 ); + }; + +TagSubAction: + '<' TAG_sub_action '>' + InlineList + '<' '/' TAG_sub_action '>' { + $$ = new InlineItem( InputLoc(), InlineItem::SubAction ); + $$->children = $4; + }; + +TagLmInitTokStart: + '<' TAG_init_tokstart '>' + '<' '/' TAG_init_tokstart '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmInitTokStart ); + }; + +TagLmInitAct: + '<' TAG_init_act '>' + '<' '/' TAG_init_act '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmInitAct ); + }; + +TagLmSetTokStart: + '<' TAG_set_tokstart '>' + '<' '/' TAG_set_tokstart '>' { + $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokStart ); + cgd->hasLongestMatch = true; + }; + +TagActionTableList: + TagActionTableListHead + ActionTableList + '<' '/' TAG_action_table_list '>'; + +TagActionTableListHead: + '<' TAG_action_table_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <action_table_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionTableList( length ); + curActionTable = 0; + } + }; + +ActionTableList: + ActionTableList TagActionTable | + /* Nothing */; + +TagActionTable: + '<' TAG_action_table AttributeList '>' + '<' '/' TAG_action_table '>' { + /* Find the length of the action table. */ + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <at> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + + /* Collect the action table. */ + RedAction *redAct = cgd->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + char *ptr = xmlData.data; + int pos = 0; + while ( *ptr != 0 ) { + unsigned long actionId = strtoul( ptr, &ptr, 10 ); + redAct->key[pos].key = 0; + redAct->key[pos].value = cgd->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + cgd->redFsm->actionMap.insert( redAct ); + } + + curActionTable += 1; + }; + +TagCondSpaceList: + TagCondSpaceListHead + CondSpaceList + '<' '/' TAG_cond_space_list '>'; + +TagCondSpaceListHead: + '<' TAG_cond_space_list AttributeList '>' { + Attribute *lengthAttr = $3->find( "length" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <cond_space_list> requires a length attribute" << endl; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initCondSpaceList( length ); + curCondSpace = 0; + } + }; + +CondSpaceList: + CondSpaceList TagCondSpace | + TagCondSpace; + +TagCondSpace: + '<' TAG_cond_space AttributeList '>' + '<' '/' TAG_cond_space '>' { + Attribute *lengthAttr = $3->find( "length" ); + Attribute *idAttr = $3->find( "id" ); + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <cond_space> requires a length attribute" << endl; + else { + if ( lengthAttr == 0 ) + xml_error(@2) << "tag <cond_space> requires an id attribute" << endl; + else { + unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); + ulong length = readLength( lengthAttr->value ); + + char *td = xmlData.data; + Key baseKey = readKey( td, &td ); + + cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( ulong a = 0; a < length; a++ ) { + long actionOffset = readOffsetPtr( td, &td ); + cgd->condSpaceItem( curCondSpace, actionOffset ); + } + curCondSpace += 1; + } + } + }; + +%% + +unsigned long readLength( char *td ) +{ + return strtoul( td, 0, 10 ); +} + +Key readKey( char *td, char **end ) +{ + if ( keyOps->isSigned ) + return Key( strtol( td, end, 10 ) ); + else + return Key( strtoul( td, end, 10 ) ); +} + +long readOffsetPtr( char *td, char **end ) +{ + while ( *td == ' ' || *td == '\t' ) + td++; + + if ( *td == 'x' ) { + if ( end != 0 ) + *end = td + 1; + return -1; + } + + return strtol( td, end, 10 ); +} + +void yyerror( char *err ) +{ + /* Bison won't give us the location, but in the last call to the scanner we + * saved a pointer to the locationn variable. Use that. instead. */ + error(::yylloc->first_line, ::yylloc->first_column) << err << endl; +} + diff --git a/rlcodegen/xmlscan.lex b/rlcodegen/xmlscan.lex new file mode 100644 index 0000000..4ebd70a --- /dev/null +++ b/rlcodegen/xmlscan.lex @@ -0,0 +1,433 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +%{ + +#define YY_NEVER_INTERACTIVE 1 +//#define WANT_TOKEN_WRITE + +#include <iostream> +#include "vector.h" +#include "rlcodegen.h" +#include "xmlparse.h" +#include "buffer.h" + +using std::cout; +using std::cerr; +using std::endl; + +Buffer tokbuf; +int builtinBrace = 0; +bool inlineWhitespace = true; +bool handlingInclude = false; + +YYSTYPE *yylval; +YYLTYPE *yylloc; + +void garble(); + +void extendToken(); +void extendToken( char *data, int len ); + +int emitToken( int token, char *data, int len ); +int emitNoData( int token ); +int emitTag( char *data, int len, bool isOpen ); +void passThrough( char *data ); +void popInclude(); +void scannerInit(); + +enum InlineBlockType { + CurlyDelimited, + SemiTerminated +} inlineBlockType; + +/* Using a wrapper for the parser, must the lex declaration. */ +#define YY_DECL int rlcodegen_lex() + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); + +public: + static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); +}; + +Vector<bool> shouldEmitXMLData; + +int first_line = 1; +int first_column = 1; +int last_line = 1; +int last_column = 0; + +Buffer xmlData; + +%} + +%x OPEN_TAG +%x CLOSE_TAG1 +%x CLOSE_TAG2 +%x ATTR_LIST +%x ATTR_LITERAL + +WSCHAR [\t\n\v\f\r ] +IDENT [a-zA-Z_][a-zA-Z_0-9\-]* + +%% + + /* Numbers in outter code. */ +<INITIAL>"<" { + BEGIN(OPEN_TAG); + shouldEmitXMLData.prepend( false ); + return emitNoData( *yytext ); +} + +<INITIAL>[^<&]+ { + if ( shouldEmitXMLData[0] ) + xmlData.append( yytext, yyleng ); + garble(); +} +<INITIAL>"&" { + if ( shouldEmitXMLData[0] ) + xmlData.append( "&", 1 ); + garble(); +} +<INITIAL>"<" { + if ( shouldEmitXMLData[0] ) + xmlData.append( "<", 1 ); + garble(); +} +<INITIAL>">" { + if ( shouldEmitXMLData[0] ) + xmlData.append( ">", 1 ); + garble(); +} + + /* + * Tags + */ + +<OPEN_TAG>"/" { + BEGIN(CLOSE_TAG1); + xmlData.append(0); + return emitNoData( *yytext ); +} + +<OPEN_TAG>{IDENT} { + BEGIN( ATTR_LIST ); + return emitTag( yytext, yyleng, true ); +} + +<OPEN_TAG,CLOSE_TAG1>{WSCHAR}+ { + garble(); +} + +<CLOSE_TAG1>{IDENT} { + BEGIN( CLOSE_TAG2 ); + return emitTag( yytext, yyleng, false ); +} + +<CLOSE_TAG2>">" { + shouldEmitXMLData.remove( 0 ); + BEGIN(INITIAL); + return emitNoData( *yytext ); +} + +<ATTR_LIST>{IDENT} { + return emitToken( XML_Word, yytext, yyleng ); +} + +<ATTR_LIST>\" { + BEGIN(ATTR_LITERAL); + extendToken(); +} +<ATTR_LITERAL>\\. extendToken( yytext+1, 1 ); +<ATTR_LITERAL>\\\n extendToken( yytext+1, 1 ); +<ATTR_LITERAL>[^\\"]+ extendToken( yytext, yyleng ); + + /* Terminate a double literal */ +<ATTR_LITERAL>\" { + BEGIN(ATTR_LIST); + return emitToken( XML_Literal, 0, 0 ); +} + +<ATTR_LIST>{WSCHAR}+ { + garble(); +} + +<ATTR_LIST>">" { + BEGIN(INITIAL); + return emitNoData( *yytext ); +} + +<ATTR_LIST>. { + return emitNoData( *yytext ); +} + +%% + +/* Write out token data, escaping special charachters. */ +#ifdef WANT_TOKEN_WRITE +void writeToken( int token, char *data ) +{ + cout << "token id " << token << " at " << id->fileName << ":" << + yylloc->first_line << ":" << yylloc->first_column << "-" << + yylloc->last_line << ":" << yylloc->last_column << " "; + + if ( data != 0 ) { + while ( *data != 0 ) { + switch ( *data ) { + case '\n': cout << "\\n"; break; + case '\t': cout << "\\t"; break; + default: cout << *data; break; + } + data += 1; + } + } + cout << endl; +} +#endif + +/* Caclulate line info from yytext. Called on every pattern match. */ +void updateLineInfo() +{ + /* yytext should always have at least wone char. */ + assert( yytext[0] != 0 ); + + /* Scan through yytext up to the last character. */ + char *p = yytext; + for ( ; p[1] != 0; p++ ) { + if ( p[0] == '\n' ) { + last_line += 1; + last_column = 0; + } + else { + last_column += 1; + } + } + + /* Always consider the last character as not a newline. Newlines at the + * end of a token are as any old character at the end of the line. */ + last_column += 1; + + /* The caller may be about to emit a token, be prepared to pass the line + * info to the parser. */ + yylloc->first_line = first_line; + yylloc->first_column = first_column; + yylloc->last_line = last_line; + yylloc->last_column = last_column; + + /* If the last character was indeed a newline, then wrap ahead now. */ + if ( p[0] == '\n' ) { + last_line += 1; + last_column = 0; + } +} + + +/* Eat up a matched pattern that will not be part of a token. */ +void garble() +{ + /* Update line information from yytext. */ + updateLineInfo(); + + /* The next token starts ahead of the last token. */ + first_line = last_line; + first_column = last_column + 1; +} + +/* Extend a token, but don't add any data to it, more token data expected. */ +void extendToken() +{ + /* Update line information from yytext. */ + updateLineInfo(); +} + +/* Append data to the end of the token. More token data expected. */ +void extendToken( char *data, int len ) +{ + if ( data != 0 && len > 0 ) + tokbuf.append( data, len ); + + /* Update line information from yytext. */ + updateLineInfo(); +} + + +/* Append data to the end of a token and emitToken it to the parser. */ +int emitToken( int token, char *data, int len ) +{ + /* Append the data and null terminate. */ + if ( data != 0 && len > 0 ) + tokbuf.append( data, len ); + tokbuf.append( 0 ); + + /* Duplicate the buffer. */ + yylval->data = new char[tokbuf.length]; + strcpy( yylval->data, tokbuf.data ); + + /* Update line information from yytext. */ + updateLineInfo(); + + /* Write token info. */ +#ifdef WANT_TOKEN_WRITE + writeToken( token, tokbuf.data ); +#endif + + /* Clear out the buffer. */ + tokbuf.clear(); + + /* The next token starts ahead of the last token. */ + first_line = last_line; + first_column = last_column + 1; + + return token; +} + +/* Append data to the end of a token and emitToken it to the parser. */ +int emitTag( char *data, int len, bool isOpen ) +{ + /* Lookup the tag. */ + int token = TAG_unknown; + + XMLTagHashPair *tag = Perfect_Hash::in_word_set( data, len ); + if ( tag != 0 ) + token = tag->id; + + if ( isOpen ) { + switch ( token ) { + case TAG_host: case TAG_t: case TAG_start_state: + case TAG_action_table: + case TAG_alphtype: case TAG_state_actions: + case TAG_entry_points: + case TAG_text: case TAG_goto: + case TAG_call: case TAG_next: + case TAG_set_act: case TAG_set_tokend: + case TAG_entry: case TAG_option: + case TAG_cond_space: case TAG_c: + shouldEmitXMLData[0] = true; + xmlData.clear(); + } + } + + return emitToken( token, data, len ); +} + +/* Emit a token with no data to the parser. */ +int emitNoData( int token ) +{ + /* Return null to the parser. */ + yylval->data = 0; + + /* Update line information from yytext. */ + updateLineInfo(); + + /* Write token info. */ +#ifdef WANT_TOKEN_WRITE + writeToken( token, 0 ); +#endif + + /* Clear out the buffer. */ + tokbuf.clear(); + + /* The next token starts ahead of the last token. */ + first_line = last_line; + first_column = last_column + 1; + + return token; +} + +/* Pass tokens in outter code through to the output. */ +void passThrough( char *data ) +{ + /* If no errors, we are emitting code and we are at the bottom of the + * include stack (the source file listed on the command line) then write + * out the data. */ + if ( gblErrorCount == 0 && outputFormat == OutCode ) + *outStream << data; +} + +/* Init a buffer. */ +Buffer::Buffer() +: + data(0), + length(0), + allocated(0) +{ +} + +/* Empty out a buffer on destruction. */ +Buffer::~Buffer() +{ + empty(); +} + +/* Free the space allocated for the buffer. */ +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +/* Grow the buffer when to len allocation. */ +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +int yywrap() +{ + /* Once processessing of the input is done, signal no more. */ + return 1; +} + +/* Here simply to suppress the unused yyunpt warning. */ +void thisFuncIsNeverCalled() +{ + yyunput(0, 0); +} + +void scannerInit() +{ + /* Set this up in case we are initially given something other + * than an opening tag. */ + shouldEmitXMLData.prepend( false ); +} + +/* Wrapper for the lexer which stores the locations of the value and location + * variables of the parser into globals. The parser is reentrant, however the scanner + * does not need to be, so globals work fine. This saves us passing them around + * all the helper functions. */ +int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ) +{ + ::yylval = yylval; + ::yylloc = yylloc; + return rlcodegen_lex(); +} diff --git a/rlcodegen/xmlscan.rl b/rlcodegen/xmlscan.rl new file mode 100644 index 0000000..926c211 --- /dev/null +++ b/rlcodegen/xmlscan.rl @@ -0,0 +1,333 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <iostream> +#include <string.h> +#include "vector.h" +#include "xmlparse.h" +#include "rlcodegen.h" +#include "buffer.h" + +using std::istream; +using std::cout; +using std::endl; + +#define BUFSIZE 4096 + +%%{ + machine Scanner; + write data; +}%% + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); + +public: + static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); +}; + +struct Scanner +{ + Scanner( istream &input ) : + input(input), + curline(1), + curcol(1), + p(0), pe(0), + done(false), + data(0), data_len(0), + value(0) + { + %%{ + machine Scanner; + write init; + }%% + } + + int scan(); + void adjustAttrPointers( int distance ); + + istream &input; + + /* Scanner State. */ + int cs, act, have, curline, curcol; + char *tokstart, *tokend; + char *p, *pe; + int done; + + /* Token data */ + char *data; + int data_len; + int value; + AttrMkList attrMkList; + Buffer buffer; + char *tag_id_start; + int tag_id_len; + int token_col, token_line; + + char buf[BUFSIZE]; +}; + + +#define TK_NO_TOKEN (-1) +#define TK_ERR 1 +#define TK_EOF 2 +#define TK_OpenTag 3 +#define TK_CloseTag 4 + +#define ret_tok( _tok ) token = (_tok); data = tokstart + +void Scanner::adjustAttrPointers( int distance ) +{ + for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) { + attr->id -= distance; + attr->value -= distance; + } +} + +int Scanner::scan( ) +{ + int token = TK_NO_TOKEN; + int space, readlen; + char *attr_id_start; + char *attr_value_start; + int attr_id_len; + int attr_value_len; + + attrMkList.empty(); + buffer.clear(); + + while ( 1 ) { + if ( p == pe ) { + //printf("scanner: need more data\n"); + + if ( tokstart == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + //printf("scanner: buffer broken mid token\n"); + have = pe - tokstart; + memmove( buf, tokstart, have ); + + int distance = tokstart - buf; + tokend -= distance; + tag_id_start -= distance; + attr_id_start -= distance; + attr_value_start -= distance; + adjustAttrPointers( distance ); + tokstart = buf; + } + + p = buf + have; + space = BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + //printf("scanner: out of buffer space, you have a really long tag\n"); + return TK_ERR; + } + + if ( done ) { + //printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + input.read( p, space ); + readlen = input.gcount(); + if ( input.eof() ) { + //printf("scanner: setting done flag\n"); + done = 1; + } + } + + pe = p + readlen; + } + + /* There is no claim that this is a proper XML parser, but it is good + * enough for our purposes. */ + %%{ + machine Scanner; + + action colup { curcol++; } + action start_tok { token_col = curcol; token_line = curline; } + NL = '\n' @{ curcol = 0; curline++; }; + + WS = [\r\t ] | NL; + id = [_a-zA-Z][_a-zA-Z0-9]*; + literal = '"' ( [^"] | NL )* '"'; + + # Attribute identifiers. + action start_attr_id { attr_id_start = p; } + action leave_attr_id { attr_id_len = p - attr_id_start; } + + attr_id = id >start_attr_id %leave_attr_id; + + # Attribute values + action start_attr_value { attr_value_start = p; } + action leave_attr_value + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } + + attr_value = literal >start_attr_value %leave_attr_value; + + # Attribute list. + attribute = attr_id WS* '=' WS* attr_value WS*; + + # Tag identifiers. + action tag_id_start { tag_id_start = p; } + action leave_tag_id { tag_id_len = p - tag_id_start; } + + tag_id = id >tag_id_start %leave_tag_id; + + main := |* + # Tags + ( '<' WS* tag_id ( WS+ attribute* )? '>' ) >start_tok $colup + => { ret_tok( TK_OpenTag ); fbreak; }; + + ( '<' WS* '/' WS* tag_id WS* '>' ) >start_tok $colup + => { ret_tok( TK_CloseTag ); fbreak; }; + + # Data in between tags. + ( [^<&\0] | NL ) $colup + => { buffer.append( *p ); }; + + # Specials. + "&" $colup + => { buffer.append( '&' ); }; + "<" $colup + => { buffer.append( '<' ); }; + ">" $colup + => { buffer.append( '>' ); }; + + # EOF + 0 >start_tok => { ret_tok( TK_EOF ); fbreak; }; + + *|; + + write exec; + }%% + + if ( cs == Scanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + /* fbreak does not advance p, so we do it manually. */ + p = p + 1; + data_len = p - data; + return token; + } + } +} + + +int xml_parse( istream &input, char *fileName ) +{ + Scanner scanner( input ); + Parser parser( fileName ); + + parser.init(); + + while ( 1 ) { + int token = scanner.scan(); + if ( token == TK_EOF ) { + //cout << "parser_driver: EOF" << endl; + parser.token( _eof ); + break; + } + else if ( token == TK_ERR ) { + //cout << "parser_driver: ERR" << endl; + break; + } + else { + /* All other tokens are either open or close tags. */ + XMLTagHashPair *tagId = Perfect_Hash::in_word_set( + scanner.tag_id_start, scanner.tag_id_len ); + + XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ? + XMLTag::Open : XMLTag::Close ); + + if ( tagId != 0 ) { + /* Get attributes for open tags. */ + if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) { + tag->attrList = new AttrList; + for ( AttrMkList::Iter attr = scanner.attrMkList; + attr.lte(); attr++ ) + { + Attribute newAttr; + newAttr.id = new char[attr->idLen+1]; + memcpy( newAttr.id, attr->id, attr->idLen ); + newAttr.id[attr->idLen] = 0; + + /* Exclude the surrounding quotes. */ + newAttr.value = new char[attr->valueLen-1]; + memcpy( newAttr.value, attr->value+1, attr->valueLen-2 ); + newAttr.value[attr->valueLen-2] = 0; + + tag->attrList->append( newAttr ); + } + } + + /* Get content for closing tags. */ + if ( token == TK_CloseTag ) { + switch ( tagId->id ) { + case TAG_host: case TAG_option: + case TAG_t: case TAG_alphtype: + case TAG_text: case TAG_goto: + case TAG_call: case TAG_next: + case TAG_entry: case TAG_set_tokend: + case TAG_set_act: case TAG_start_state: + case TAG_state_actions: case TAG_action_table: + case TAG_cond_space: case TAG_c: + tag->content = new char[scanner.buffer.length+1]; + memcpy( tag->content, scanner.buffer.data, + scanner.buffer.length ); + tag->content[scanner.buffer.length] = 0; + break; + } + } + } + + #if 0 + cout << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << + ": " << tag->tagId->name << endl; + if ( tag->attrList != 0 ) { + for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ ) + cout << " " << attr->id << ": " << attr->value << endl; + } + if ( tag->content != 0 ) + cout << " content: " << tag->content << endl; + #endif + + parser.token( tag, scanner.token_col, scanner.token_line ); + } + } + + return 0; +} diff --git a/rlcodegen/xmltags.gperf b/rlcodegen/xmltags.gperf new file mode 100644 index 0000000..2554e24 --- /dev/null +++ b/rlcodegen/xmltags.gperf @@ -0,0 +1,81 @@ +/* + * Copyright 2005 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +%{ +#include <string.h> +#include "xmlparse.h" +%} +%compare-strncmp +struct XMLTagHashPair; +%% +ragel, TAG_ragel +ragel_def, TAG_ragel_def +host, TAG_host +state_list, TAG_state_list +state, TAG_state +trans_list, TAG_trans_list +t, TAG_t +machine, TAG_machine +start_state, TAG_start_state +action_list, TAG_action_list +action, TAG_action +action_table_list, TAG_action_table_list +action_table, TAG_action_table +alphtype, TAG_alphtype +getkey, TAG_getkey +state_actions, TAG_state_actions +entry_points, TAG_entry_points +text, TAG_text +goto, TAG_goto +call, TAG_call +next, TAG_next +goto_expr, TAG_goto_expr +call_expr, TAG_call_expr +next_expr, TAG_next_expr +ret, TAG_ret +pchar, TAG_pchar +char, TAG_char +hold, TAG_hold +exec, TAG_exec +holdte, TAG_holdte +execte, TAG_execte +curs, TAG_curs +targs, TAG_targs +entry, TAG_entry +data, TAG_data +lm_switch, TAG_lm_switch +sub_action, TAG_sub_action +init_act, TAG_init_act +set_act, TAG_set_act +get_tokend, TAG_get_tokend +set_tokend, TAG_set_tokend +init_tokstart, TAG_init_tokstart +set_tokstart, TAG_set_tokstart +write, TAG_write +curstate, TAG_curstate +access, TAG_access +break, TAG_break +option, TAG_option +cond_space_list, TAG_cond_space_list +cond_space, TAG_cond_space +cond_list, TAG_cond_list +c, TAG_c diff --git a/test/Makefile.in b/test/Makefile.in new file mode 100644 index 0000000..4dafdc0 --- /dev/null +++ b/test/Makefile.in @@ -0,0 +1,30 @@ +# +# Copyright 2002-2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +test: + @./runtests + +clean: + rm -f *.c *.cpp *.m *.d *.java *.bin *.class *.exp \ + *.out *_c.rl *_d.rl *_java.rl + +distclean: clean + rm -f Makefile diff --git a/test/README b/test/README new file mode 100644 index 0000000..beb0c03 --- /dev/null +++ b/test/README @@ -0,0 +1,13 @@ + +The test suite now depends on TXL. Since the trend in Ragel is towards +independence of the host-language, tests are now being written in a fictional +mini-language designed for the purpose of testing ragel. The host language +test-cases are then generated using a TXL transformation. This allows one test +case to be run against all host languages in addition to all code generation +styles. + +TXL is not open source, but a free download is available from the homepage. + +http://www.txl.ca/ + +-Adrian diff --git a/test/atoi1.rl b/test/atoi1.rl new file mode 100644 index 0000000..60fc959 --- /dev/null +++ b/test/atoi1.rl @@ -0,0 +1,69 @@ +/* + * @LANG: indep + */ +bool neg; +int val; +%% +val = 0; +neg = false; +%%{ + machine AtoI; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + action finish { + if ( neg ) { + val = -1 * val; + } + } + action print { + printi val; + prints "\n"; + } + + atoi = ( + ('-'@see_neg | '+')? (digit @add_digit)+ + ) >begin %finish; + + main := atoi '\n' @print; +}%% +/* _____INPUT_____ +"1\n" +"12\n" +"222222\n" +"+2123\n" +"213 3213\n" +"-12321\n" +"--123\n" +"-99\n" +" -3000\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +1 +ACCEPT +12 +ACCEPT +222222 +ACCEPT +2123 +ACCEPT +FAIL +-12321 +ACCEPT +FAIL +-99 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/atoi2.rl b/test/atoi2.rl new file mode 100644 index 0000000..e16380a --- /dev/null +++ b/test/atoi2.rl @@ -0,0 +1,81 @@ +/* + * @LANG: indep + * This implementes an atoi machine using the statechart paradigm. + */ +bool neg; +int val; +%% +val = 0; +neg = false; +%%{ + machine StateChart; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + action finish { + if ( neg ) + val = -1 * val; + } + + atoi = ( + start: ( + '-' @see_neg ->om_num | + '+' ->om_num | + [0-9] @add_digit ->more_nums + ), + + # One or more nums. + om_num: ( + [0-9] @add_digit ->more_nums + ), + + # Zero ore more nums. + more_nums: ( + [0-9] @add_digit ->more_nums | + '' -> final + ) + ) >begin %finish; + + action oneof { printi val; prints "\n"; } + main := ( atoi '\n' @oneof )*; +}%% +/* _____INPUT_____ +"1\n" +"12\n" +"222222\n" +"+2123\n" +"213 3213\n" +"-12321\n" +"--123\n" +"-99\n" +" -3000\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +1 +ACCEPT +12 +ACCEPT +222222 +ACCEPT +2123 +ACCEPT +FAIL +-12321 +ACCEPT +FAIL +-99 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/awkemu.rl b/test/awkemu.rl new file mode 100644 index 0000000..a988651 --- /dev/null +++ b/test/awkemu.rl @@ -0,0 +1,157 @@ +/* + * @LANG: c + */ + +/* + * Emulate the basic parser of the awk program. Breaks lines up into + * words and prints the words. + */ + +#include <stdio.h> +#include <string.h> + +#define LINEBUF 2048 +static char lineBuf[LINEBUF]; +static char blineBuf[LINEBUF]; +static int lineLen; +static int blineLen; +static int words; + +void finishLine(); + +struct awkemu +{ + int cs; +}; + +%%{ + machine awkemu; + + variable curstate fsm->cs; + + # Starts a line. Will initialize all the data necessary for capturing the line. + action startline { + lineLen = 0; + blineLen = 0; + words = 0; + } + + # Will be executed on every character seen in a word. Captures the word + # to the broken up line buffer. + action wordchar { + blineBuf[blineLen++] = fc; + } + + # Terminate a word. Adds the null after the word and increments the word count + # for the line. + action termword { + blineBuf[blineLen++] = 0; + words += 1; + } + + # Will be executed on every character seen in a line (not including + # the newline itself. + action linechar { + lineBuf[lineLen++] = fc; + } + + # This section of the machine deals with breaking up lines into fields. + # Lines are separed by the whitespace and put in an array of words. + + # Words in a line. + word = (extend - [ \t\n])+; + + # The whitespace separating words in a line. + whitespace = [ \t]; + + # The components in a line to break up. Either a word or a single char of + # whitespace. On the word capture characters. + blineElements = word $wordchar %termword | whitespace; + + # Star the break line elements. Just be careful to decrement the leaving + # priority as we don't want multiple character identifiers to be treated as + # multiple single char identifiers. + breakLine = ( blineElements $1 %0 )* . '\n'; + + # This machine lets us capture entire lines. We do it separate from the words + # in a line. + bufLine = (extend - '\n')* $linechar %{ finishLine(); } . '\n'; + + # A line can then consist of the machine that will break up the line into + # words and a machine that will buffer the entire line. + line = ( breakLine | bufLine ) > startline; + + # Any number of lines. + main := line*; +}%% + +void finishLine() +{ + int i; + char *pword = blineBuf; + lineBuf[lineLen] = 0; + printf("endline(%i): %s\n", words, lineBuf ); + for ( i = 0; i < words; i++ ) { + printf(" word: %s\n", pword ); + pword += strlen(pword) + 1; + } +} + +%% write data; + +void awkemu_init( struct awkemu *fsm ) +{ + %% write init; +} + +void awkemu_execute( struct awkemu *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + %% write exec; +} + +int awkemu_finish( struct awkemu *fsm ) +{ + %% write eof; + + if ( fsm->cs == awkemu_error ) + return -1; + if ( fsm->cs >= awkemu_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#define BUFSIZE 2048 + +struct awkemu fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen( buf ); + awkemu_init( &fsm ); + awkemu_execute( &fsm, buf, len ); + if ( awkemu_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "" ); + test( "one line with no newline" ); + test( "one line\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +FAIL +endline(2): one line + word: one + word: line +ACCEPT +#endif diff --git a/test/builtin.rl b/test/builtin.rl new file mode 100644 index 0000000..626927b --- /dev/null +++ b/test/builtin.rl @@ -0,0 +1,1209 @@ +/* + * @LANG: c + */ + +#include <stdio.h> + +void alph(const char *type) +{ + printf("%s\n", type); +} + +struct builtin +{ + int cs; +}; + +%%{ + machine builtin; + alphtype unsigned int; + variable curstate fsm->cs; + + main := ( + any @{alph("any");} | + ascii @{alph("ascii");} | + extend @{alph("extend");} | + alpha @{alph("alpha");} | + digit @{alph("digit");} | + alnum @{alph("alnum");} | + lower @{alph("lower");} | + upper @{alph("upper");} | + cntrl @{alph("cntrl");} | + graph @{alph("graph");} | + print @{alph("print");} | + punct @{alph("punct");} | + space @{alph("space");} | + xdigit @{alph("xdigit");} + )*; +}%% + +%% write data; + +void builtin_init( struct builtin *fsm ) +{ + %% write init; +} + +void builtin_execute( struct builtin *fsm, const unsigned int *data, int len ) +{ + const unsigned int *p = data; + const unsigned int *pe = data+len; + %% write exec; +} + +int builtin_finish( struct builtin *fsm ) +{ + if ( fsm->cs == builtin_error ) + return -1; + else if ( fsm->cs >= builtin_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#define BUFSIZE 2048 + +struct builtin fsm; +char buf[BUFSIZE]; +unsigned int i; + +int test( const unsigned int *data, int len ) +{ + builtin_init( &fsm ); + builtin_execute( &fsm, data, len ); + if ( builtin_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + return 0; +} + +#define DLEN 258 +unsigned int data[DLEN] = { + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, + 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, + 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 +}; + +int main() +{ + test( data, DLEN ); + return 0; +} + +#ifdef _____OUTPUT_____ +any +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +print +space +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +cntrl +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +ACCEPT +#endif diff --git a/test/call1.rl b/test/call1.rl new file mode 100644 index 0000000..24a362f --- /dev/null +++ b/test/call1.rl @@ -0,0 +1,103 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +int num = 0; + +struct test +{ + int cs, top, stack[32]; +}; + +%%{ + machine test; + access fsm->; + + action check_num { + if ( num & 1 ) + fcall *fentry(odd); + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +void test_init( struct test *fsm ) +{ + num = 0; + %% write init; +} + +void test_execute( struct test *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data+len; + + %% write exec; +} + +int test_finish( struct test *fsm ) +{ + %% write eof; + + if ( fsm->cs == test_error ) + return -1; + if ( fsm->cs >= test_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + struct test test; + test_init( &test ); + test_execute( &test, buf, strlen(buf) ); + if ( test_finish( &test ) > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + + return 0; +} + + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/call2.rl b/test/call2.rl new file mode 100644 index 0000000..c447496 --- /dev/null +++ b/test/call2.rl @@ -0,0 +1,118 @@ +/* + * @LANG: c++ + */ + +#include <stdio.h> +#include <string.h> + +int num = 0; + +struct CallTest +{ + int cs, top, stack[32]; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine CallTest; + + action check_num { + if ( num & 1 ) + fcall *fentry(odd); + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +void CallTest::init( ) +{ + num = 0; + %% write init; +} + +void CallTest::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data+len; + + %% write exec; +} + +int CallTest::finish( ) +{ + %% write eof; + + if ( this->cs == CallTest_error ) + return -1; + if ( this->cs >= CallTest_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + CallTest test; + + test.init(); + test.execute( buf, strlen(buf) ); + if ( test.finish() > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/call3.rl b/test/call3.rl new file mode 100644 index 0000000..c253b24 --- /dev/null +++ b/test/call3.rl @@ -0,0 +1,123 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <string.h> +#include <objc/Object.h> + + +int num = 0; + +@interface CallTest : Object +{ +@public + /* State machine operation data. */ + int cs, top, stack[32]; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (void) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation CallTest + +%%{ + machine CallTest; + + action check_num { + if ( num & 1 ) + fcall odd; + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +- (void) initFsm; +{ + num = 0; + %% write init; +} + +- (void) executeWithData:(const char *)data len:(int)len; +{ + const char *p = data; + const char *pe = data + len; + %% write exec; +} + +- (int) finish; +{ + %% write eof; + if ( cs == CallTest_error ) + return -1; + return ( cs >= CallTest_first_final ) ? 1 : 0; +} + +@end + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + CallTest *test = [[CallTest alloc] init]; + [test initFsm]; + [test executeWithData:buf len:strlen(buf)]; + if ( [test finish] > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/clang1.rl b/test/clang1.rl new file mode 100644 index 0000000..ade8f9a --- /dev/null +++ b/test/clang1.rl @@ -0,0 +1,283 @@ +/* + * @LANG: c + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <string.h> +#define IDENT_BUFLEN 256 + +%%{ + machine clang; + + # Function to buffer a character. + action bufChar { + if ( identLen < IDENT_BUFLEN ) { + identBuf[identLen] = fc; + identLen += 1; + } + } + + # Function to clear the buffer. + action clearBuf { + identLen = 0; + } + + # Functions to dump tokens as they are matched. + action ident { + identBuf[identLen] = 0; + printf("ident(%i): %s\n", curLine, identBuf); + } + action literal { + identBuf[identLen] = 0; + printf("literal(%i): %s\n", curLine, identBuf); + } + action float { + identBuf[identLen] = 0; + printf("float(%i): %s\n", curLine, identBuf); + } + action int { + identBuf[identLen] = 0; + printf("int(%i): %s\n", curLine, identBuf); + } + action hex { + identBuf[identLen] = 0; + printf("hex(%i): 0x%s\n", curLine, identBuf); + } + action symbol { + identBuf[identLen] = 0; + printf("symbol(%i): %s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine += 1; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% + +#include <stdio.h> + +%% write data noerror; + + +char data[] = + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n"; + +void test( char *buf ) +{ + int len = strlen( buf ); + char *p = buf, *pe = buf + len; + char identBuf[IDENT_BUFLEN+1]; + int identLen; + int curLine; + int cs; + + identLen = 0; + curLine = 1; + + %% write init; + %% write exec; + %% write eof; + + if ( cs >= clang_first_final ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +#endif diff --git a/test/clang2.rl b/test/clang2.rl new file mode 100644 index 0000000..fcb6ba8 --- /dev/null +++ b/test/clang2.rl @@ -0,0 +1,324 @@ +/* + * @LANG: obj-c + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <objc/Object.h> +#include <string.h> + +#define IDENT_BUFLEN 256 + +@interface Clang : Object +{ +@public + /* State machine operation data. */ + int cs; + + /* Parsing data. */ + char identBuf[IDENT_BUFLEN+1]; + int identLen; + int curLine; +}; + +- (void) initFsm; +- (void) executeWithData:(const char *)data len:(int)len; +- (int) finish; + +@end + +%%{ + machine Clang; + + # Function to buffer a character. + action bufChar { + if ( identLen < IDENT_BUFLEN ) { + identBuf[identLen] = fc; + identLen += 1; + } + } + + # Function to clear the buffer. + action clearBuf { + identLen = 0; + } + + # Functions to dump tokens as they are matched. + action ident { + identBuf[identLen] = 0; + printf("ident(%i): %s\n", curLine, identBuf); + } + action literal { + identBuf[identLen] = 0; + printf("literal(%i): %s\n", curLine, identBuf); + } + action float { + identBuf[identLen] = 0; + printf("float(%i): %s\n", curLine, identBuf); + } + action int { + identBuf[identLen] = 0; + printf("int(%i): %s\n", curLine, identBuf); + } + action hex { + identBuf[identLen] = 0; + printf("hex(%i): 0x%s\n", curLine, identBuf); + } + action symbol { + identBuf[identLen] = 0; + printf("symbol(%i): %s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine += 1; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% + +@implementation Clang + +%% write data; + +- (void) initFsm; +{ + identLen = 0; + curLine = 1; + %% write init; +} + +- (void) executeWithData:(const char *)data len:(int)len; +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +- (int) finish; +{ + %% write eof; + if ( cs == Clang_error ) + return -1; + if ( cs >= Clang_first_final ) + return 1; + return 0; +} + +@end + +#define BUFSIZE 2048 + +Clang *fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen(buf); + fsm = [[Clang alloc] init]; + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + + test( + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +#endif diff --git a/test/clang3.rl b/test/clang3.rl new file mode 100644 index 0000000..d950eb3 --- /dev/null +++ b/test/clang3.rl @@ -0,0 +1,321 @@ +/* + * @LANG: d + * A mini C-like language scanner. + */ + +module clang; + +import std.c.stdio; + +char[] string(char c) +{ + char[] result = new char[2]; + result[0] = c; + result[1] = 0; + return result[0 .. 1]; +} + +class CLang +{ + /* Parsing data. */ + char[] identBuf; + int curLine; + + this() + { + } + + /* State machine operation data. */ + int cs; + + %%{ + machine clang; + + # Function to buffer a character. + action bufChar { + identBuf ~= fc; + } + + # Function to clear the buffer. + action clearBuf { + + identBuf = null; + } + + # Functions to dump tokens as they are matched. + action ident { + printf("ident(%i): %.*s\n", curLine, identBuf); + } + action literal { + printf("literal(%i): %.*s\n", curLine, identBuf); + } + action float { + printf("float(%i): %.*s\n", curLine, identBuf); + } + action int { + printf("int(%i): %.*s\n", curLine, identBuf); + } + action hex { + printf("hex(%i): 0x%.*s\n", curLine, identBuf); + } + action symbol { + printf("symbol(%i): %.*s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine++; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; + }%% + + %% write data noprefix; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ) + { + curLine = 1; + %% write init; + } + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( char* _data, int _len ) + { + char *p = _data; + char *pe = _data + _len; + %% write exec; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + %% write eof; + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +} + +static const int BUFSIZE = 1024; + +void test( char buf[] ) +{ + CLang scanner = new CLang(); + scanner.init(); + scanner.execute( buf.ptr, buf.length ); + if ( scanner.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + + return 0; +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + + test( + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n" ); + return 0; +} + +/+ _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +++++++++++++++++/ diff --git a/test/cond1.rl b/test/cond1.rl new file mode 100644 index 0000000..620ea5e --- /dev/null +++ b/test/cond1.rl @@ -0,0 +1,68 @@ +/* + * @LANG: indep + */ +bool i; +bool j; +bool k; +%% + +%%{ + machine foo; + + action c1 {i} + action c2 {j} + action c3 {k} + action one { prints " one\n";} + action two { prints " two\n";} + action three { prints " three\n";} + + action seti { if ( fc == '0' ) i = false; else i = true; } + action setj { if ( fc == '0' ) j = false; else j = true; } + action setk { if ( fc == '0' ) k = false; else k = true; } + + action break {fbreak;} + + one = 'a' 'b' when c1 'c' @one; + two = 'a'* 'b' when c2 'c' @two; + three = 'a'+ 'b' when c3 'c' @three; + + main := + [01] @seti + [01] @setj + [01] @setk + ( one | two | three ) '\n' @break; + +}%% + +/* _____INPUT_____ +"000abc\n" +"100abc\n" +"010abc\n" +"110abc\n" +"001abc\n" +"101abc\n" +"011abc\n" +"111abc\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +FAIL + one +ACCEPT + two +ACCEPT + one + two +ACCEPT + three +ACCEPT + one + three +ACCEPT + two + three +ACCEPT + one + two + three +ACCEPT +_____OUTPUT_____ */ diff --git a/test/cond2.rl b/test/cond2.rl new file mode 100644 index 0000000..7593a3f --- /dev/null +++ b/test/cond2.rl @@ -0,0 +1,91 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action c1 {i} + action c2 {j} + + action one { cout << " one" << endl;} + action two { cout << " two" << endl;} + + main := ( + [a-z] | + ('\n' when c1 @one) + )* + ('\n' when c2 @two); +}%% + +%% write data noerror; + +void test( int i, int j, char *str ) +{ + int cs = foo_start; + char *p = str; + char *pe = str + strlen( str ); + + cout << "run:" << endl; + %% write exec; + if ( cs >= foo_first_final ) + cout << " success" << endl; + else + cout << " failure" << endl; + cout << endl; +} + +int main() +{ + test( 0, 0, "hi\n\n" ); + test( 1, 0, "hi\n\n" ); + test( 0, 1, "hi\n" ); + test( 0, 1, "hi\n\n" ); + test( 1, 1, "hi\n" ); + test( 1, 1, "hi\n\n" ); + test( 1, 1, "hi\n\nx" ); + return 0; +} + +#ifdef _____OUTPUT_____ +run: + failure + +run: + one + one + failure + +run: + two + success + +run: + two + failure + +run: + one + two + success + +run: + one + two + one + two + success + +run: + one + two + one + two + failure + +#endif diff --git a/test/cond3.rl b/test/cond3.rl new file mode 100644 index 0000000..1847727 --- /dev/null +++ b/test/cond3.rl @@ -0,0 +1,59 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action hit_5 {c == 5} + action done { cout << " done" << endl; } + action inc {c++;} + + # The any* includes '\n' when hit_5 is true, so use guarded concatenation. + main := (any @inc)* :> '\n' when hit_5 @done; +}%% + +%% write data noerror; + +void test( char *str ) +{ + int cs = foo_start; + int c = 0; + char *p = str; + char *pe = str + strlen( str ); + + cout << "run:" << endl; + %% write exec; + if ( cs >= foo_first_final ) + cout << " success" << endl; + else + cout << " failure" << endl; + cout << endl; +} + +int main() +{ + test( "12345\n" ); // success + test( "\n2345\n" ); // success, first newline ignored + test( "1234\n" ); // failure, didn't get 5 chars before newline. + return 0; +} + +#ifdef _____OUTPUT_____ +run: + done + success + +run: + done + success + +run: + failure + +#endif diff --git a/test/cond4.rl b/test/cond4.rl new file mode 100644 index 0000000..380c5ff --- /dev/null +++ b/test/cond4.rl @@ -0,0 +1,54 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action c1 {(cout << "c1 ", true)} + action c2 {(cout << "c2 ", true)} + action c3 {(cout << "c3 ", true)} + action c4 {(cout << "c4 ", true)} + + main := ( + 10 .. 60 when c1 | + 20 .. 40 when c2 | + 30 .. 50 when c3 | + 32 .. 38 when c4 | + 0 .. 70 )* ${cout << "char: " << (int)*p << endl;}; +}%% + +%% write data noerror nofinal; + +void test( char *str ) +{ + int len = strlen( str ); + int cs = foo_start; + char *p = str, *pe = str+len; + %% write exec; +} + +char data[] = { 5, 15, 25, 31, 35, 39, 45, 55, 65, 0 }; + +int main() +{ + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +char: 5 +c1 char: 15 +c1 c2 char: 25 +c1 c2 c3 char: 31 +c1 c2 c3 c4 char: 35 +c1 c2 c3 char: 39 +c1 c3 char: 45 +c1 char: 55 +char: 65 +#endif diff --git a/test/cond5.rl b/test/cond5.rl new file mode 100644 index 0000000..57e3c85 --- /dev/null +++ b/test/cond5.rl @@ -0,0 +1,59 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + write data noerror; +}%% + +void test( char *str ) +{ + int cs = foo_start; + int c = 0; + char *p = str; + char *pe = str + strlen( str ); + char last = '0'; + + cout << "run:"; + %%{ + action d1 { cout << " d1"; } + action see_five { cout << " see_five"; } + + see_five = ([0-9] when{c++ < 5} @d1)* '\n' @see_five; + + action in_sequence { cout << " in_sequence"; } + action d2 { last = *p; cout << " d2"; } + in_sequence = ( [0-9] when { *p == last+1 } @d2 )* '\n' @in_sequence; + + main := ( see_five | in_sequence ) ${cout << " |";}; + + write exec; + }%% + if ( cs < foo_first_final ) + cout << " failure"; + cout << endl; +} + +int main() +{ + test( "123456789012\n" ); // fails both + test( "123456789\n" ); // fails five + test( "1234\n" ); // fails five + test( "13245\n" ); // fails sequence + test( "12345\n" ); // succeeds in both + return 0; +} + +#ifdef _____OUTPUT_____ +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | failure +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | in_sequence | +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence | +run: d1 d2 | d1 | d1 | d1 | d1 | see_five | +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence | +#endif diff --git a/test/cond6.rl b/test/cond6.rl new file mode 100644 index 0000000..25bf45d --- /dev/null +++ b/test/cond6.rl @@ -0,0 +1,61 @@ +/* + * @LANG: c++ + */ + +/* Balanced parenthesis with conditions. */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine cond; + write data noerror; +}%% + +void test( char *str ) +{ + int cs = cond_start, n = 0; + char *p = str; + char *pe = str + strlen( str ); + + %%{ + comment = '(' @{n=0;} + ( '('@{n++;} | ')'@{n--;} | [^()] )* + :> ')' when{!n}; + + main := ' '* comment ' '* '\n' @{cout << "success";}; + + write exec; + }%% + if ( cs < cond_first_final ) + cout << "failure"; + cout << endl; +} + +int main() +{ + test( "( ( )\n" ); + test( "()()\n" ); + test( "(((\n" ); + test( "((()\n" ); + test( "((())\n" ); + test( "()\n" ); + test( "((()))\n" ); + test( "(()())\n" ); + test( "((())()(((()))))\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +failure +failure +failure +failure +failure +success +success +success +success +#endif diff --git a/test/cppscan1.h b/test/cppscan1.h new file mode 100644 index 0000000..4497cd2 --- /dev/null +++ b/test/cppscan1.h @@ -0,0 +1,110 @@ +#ifndef _CPPSCAN1_H +#define _CPPSCAN1_H + +#include <iostream> + +using namespace std; + +#define BUFSIZE 2048 + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 + +/* A growable buffer for collecting headers. */ +struct Buffer +{ + Buffer() : data(0), allocated(0), length(0) { } + Buffer( const Buffer &other ) { + data = (char*)malloc( other.allocated ); + memcpy( data, other.data, other.length ); + allocated = other.allocated; + length = other.length; + } + ~Buffer() { empty(); } + + void append( char p ) { + if ( ++length > allocated ) + upAllocate( length*2 ); + data[length-1] = p; + } + void append( char *str, int len ) { + if ( (length += len) > allocated ) + upAllocate( length*2 ); + memcpy( data+length-len, str, len ); + } + + void clear() { length = 0; } + void upAllocate( int len ); + void empty(); + + char *data; + int allocated; + int length; +}; + + +struct Scanner +{ + Scanner( std::ostream &out ) + : out(out) { } + + std::ostream &out; + + int line, col; + int tokStart; + int inlineDepth; + int count; + Buffer tokBuf; + Buffer nonTokBuf; + + void pass(char c) { nonTokBuf.append(c); } + void buf(char c) { tokBuf.append(c); } + void token( int id ); + + int cs, stack, top; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +#endif diff --git a/test/cppscan1.rl b/test/cppscan1.rl new file mode 100644 index 0000000..623af28 --- /dev/null +++ b/test/cppscan1.rl @@ -0,0 +1,282 @@ +/* + * @LANG: c++ + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P + */ + +#include "cppscan1.h" + +%%{ + machine Scanner; + access fsm->; + + action pass { fsm->pass(fc); } + action buf { fsm->buf(fc); } + + action emit_slit { fsm->token( TK_Slit ); } + action emit_dlit { fsm->token( TK_Dlit ); } + action emit_id { fsm->token( TK_Id ); } + action emit_integer_decimal { fsm->token( TK_IntegerDecimal ); } + action emit_integer_octal { fsm->token( TK_IntegerOctal ); } + action emit_integer_hex { fsm->token( TK_IntegerHex ); } + action emit_float { fsm->token( TK_Float ); } + action emit_symbol { fsm->token( fsm->tokBuf.data[0] ); } + action tokst { fsm->tokStart = fsm->col; } + + # Single and double literals. + slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit; + dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) >tokst $buf %emit_float; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @buf %{fsm->token( TK_NameSep );}; + deqs = '==' @buf %{fsm->token( TK_EqualsEquals );}; + neqs = '!=' @buf %{fsm->token( TK_NotEquals );}; + and_and = '&&' @buf %{fsm->token( TK_AndAnd );}; + or_or = '||' @buf %{fsm->token( TK_OrOr );}; + mult_assign = '*=' @buf %{fsm->token( TK_MultAssign );}; + percent_assign = '%=' @buf %{fsm->token( TK_PercentAssign );}; + plus_assign = '+=' @buf %{fsm->token( TK_PlusAssign );}; + minus_assign = '-=' @buf %{fsm->token( TK_MinusAssign );}; + amp_assign = '&=' @buf %{fsm->token( TK_AmpAssign );}; + caret_assign = '^=' @buf %{fsm->token( TK_CaretAssign );}; + bar_assign = '|=' @buf %{fsm->token( TK_BarAssign );}; + plus_plus = '++' @buf %{fsm->token( TK_PlusPlus );}; + minus_minus = '--' @buf %{fsm->token( TK_MinusMinus );}; + arrow = '->' @buf %{fsm->token( TK_Arrow );}; + arrow_star = '->*' @buf %{fsm->token( TK_ArrowStar );}; + dot_star = '.*' @buf %{fsm->token( TK_DotStar );}; + + # Buffer both items. * + div_assign = '/=' @{fsm->buf('/');fsm->buf(fc);} %{fsm->token( TK_DivAssign );}; + + # Double dot is sent as two dots. + dot_dot = '..' %{fsm->token('.'); fsm->buf('.'); fsm->token('.');}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' %{fsm->buf('.'); fsm->buf('.'); fsm->token( TK_DotDotDot );}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot | dot_dot_dot; + + # Single char symbols. + symbol = + ( punct - [./_"'] ) >tokst $buf %emit_symbol | + # Do not immediately buffer slash, may be start of comment. + '/' >tokst %{ fsm->buf('/'); fsm->token( '/' ); } | + # Dot covered by float. + '.' %emit_symbol; + + # Comments and whitespace. + commc = '/*' @{fsm->pass('/'); fsm->pass('*');} ( any* $0 '*/' @1 ) $pass; + commcc = '//' @{fsm->pass('/'); fsm->pass('/');} ( any* $0 '\n' @1 ) $pass; + whitespace = ( any - ( 0 | 33..126 ) )+ $pass; + + action onEOFChar { + /* On EOF char, write out the non token buffer. */ + fsm->nonTokBuf.append(0); + cout << fsm->nonTokBuf.data; + fsm->nonTokBuf.clear(); + } + + # Using 0 as eof. If seeingAs a result all null characters get ignored. + EOF = 0 @onEOFChar; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol ); + nontok = ( commc | commcc | whitespace | EOF ); + + position = ( + '\n' @{ fsm->line += 1; fsm->col = 1; } | + [^\n] @{ fsm->col += 1; } )*; + + main := ( ( tokens | nontok )** ) & position; +}%% + +%% write data; + +void Scanner::init( ) +{ + Scanner *fsm = this; + /* A count of the number of characters in + * a token. Used for % sequences. */ + count = 0; + line = 1; + col = 1; + + %% write init; +} + +int Scanner::execute( char *data, int len ) +{ + Scanner *fsm = this; + char *p = data; + char *pe = data + len; + + %% write exec; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +int Scanner::finish( ) +{ + %% write eof; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +void Scanner::token( int id ) +{ + /* Leader. */ + if ( nonTokBuf.length > 0 ) { + nonTokBuf.append(0); + cout << nonTokBuf.data; + nonTokBuf.clear(); + } + + /* Token data. */ + tokBuf.append(0); + cout << '<' << id << '>' << tokBuf.data; + tokBuf.clear(); +} + +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +void test( char *buf ) +{ + Scanner scanner(cout); + scanner.init(); + scanner.execute( buf, strlen(buf) ); + + /* The last token is ignored (because there is no next token). Send + * trailing null to force the last token into whitespace. */ + char eof = 0; + if ( scanner.execute( &eof, 1 ) <= 0 ) { + cerr << "cppscan: scan failed" << endl; + return; + } + cout.flush(); +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "/* Construct an fsmmachine from a graph. */\n" + "RedFsmAp::RedFsmAp( FsmAp *graph, bool complete )\n" + ":\n" + " graph(graph),\n" + "{\n" + " assert( sizeof(RedTransAp) <= sizeof(TransAp) );\n" + "\n" + " reduceMachine();\n" + "}\n" + "\n" + "{\n" + " /* Get the transition that we want to extend. */\n" + " RedTransAp *extendTrans = list[pos].value;\n" + "\n" + " /* Look ahead in the transition list. */\n" + " for ( int next = pos + 1; next < list.length(); pos++, next++ ) {\n" + " if ( ! keyOps->eq( list[pos].highKey, nextKey ) )\n" + " break;\n" + " }\n" + " return false;\n" + "}\n" + "\n" ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +/* + * Copyright + */ + +/* Construct an fsmmachine from a graph. */ +<195>RedFsmAp<197>::<195>RedFsmAp<40>( <195>FsmAp <42>*<195>graph<44>, <195>bool <195>complete <41>) +<58>: + <195>graph<40>(<195>graph<41>)<44>, +<123>{ + <195>assert<40>( <195>sizeof<40>(<195>RedTransAp<41>) <60><<61>= <195>sizeof<40>(<195>TransAp<41>) <41>)<59>; + + <195>reduceMachine<40>(<41>)<59>; +<125>} + +<123>{ + /* Get the transition that we want to extend. */ + <195>RedTransAp <42>*<195>extendTrans <61>= <195>list<91>[<195>pos<93>]<46>.<195>value<59>; + + /* Look ahead in the transition list. */ + <195>for <40>( <195>int <195>next <61>= <195>pos <43>+ <218>1<59>; <195>next <60>< <195>list<46>.<195>length<40>(<41>)<59>; <195>pos<212>++<44>, <195>next<212>++ <41>) <123>{ + <195>if <40>( <33>! <195>keyOps<211>-><195>eq<40>( <195>list<91>[<195>pos<93>]<46>.<195>highKey<44>, <195>nextKey <41>) <41>) + <195>break<59>; + <125>} + <195>return <195>false<59>; +<125>} + +<214>->* +<215>.* +/*"*/ +<192>"/*" +<193>L'"' +<192>L"'" +#endif diff --git a/test/cppscan2.rl b/test/cppscan2.rl new file mode 100644 index 0000000..07fc01b --- /dev/null +++ b/test/cppscan2.rl @@ -0,0 +1,402 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +#define BUFSIZE 4096 + +int tok; +char buf[BUFSIZE], *tokstart, *tokend; +void token( char *data, int len ); +bool discard = false; + +struct Scanner +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Scanner; + + # Single and double literals. + slit = ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) @{tok = TK_Slit;}; + dlit = ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) @{tok = TK_Dlit;}; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) @{tok = TK_Id;}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) @{tok = TK_Float;}; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) @{tok = TK_IntegerDecimal;}; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} ) @{tok = TK_IntegerOctal;}; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) @{tok = TK_IntegerHex;}; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @{tok = TK_NameSep;}; + deqs = '==' @{tok = TK_EqualsEquals;}; + neqs = '!=' @{tok = TK_NotEquals;}; + and_and = '&&' @{tok = TK_AndAnd;}; + or_or = '||' @{tok = TK_OrOr;}; + mult_assign = '*=' @{tok = TK_MultAssign;}; + div_assign = '/=' @{tok = TK_DivAssign;}; + percent_assign = '%=' @{tok = TK_PercentAssign;}; + plus_assign = '+=' @{tok = TK_PlusAssign;}; + minus_assign = '-=' @{tok = TK_MinusAssign;}; + amp_assign = '&=' @{tok = TK_AmpAssign;}; + caret_assign = '^=' @{tok = TK_CaretAssign;}; + bar_assign = '|=' @{tok = TK_BarAssign;}; + plus_plus = '++' @{tok = TK_PlusPlus;}; + minus_minus = '--' @{tok = TK_MinusMinus;}; + arrow = '->' @{tok = TK_Arrow;}; + arrow_star = '->*' @{tok = TK_ArrowStar;}; + dot_star = '.*' @{tok = TK_DotStar;}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' @{tok = TK_DotDotDot;}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot_dot; + + # Single char symbols. + symbol = ( punct - [_"'] ) @{tok = fc;}; + + action discard { + discard = true; + } + + # Comments and whitespace. + commc = '/*' @discard ( any* $0 '*/' @1 ) @{tok = TK_Comment;}; + commcc = '//' @discard ( any* $0 '\n' @1 ) @{tok = TK_Comment;}; + whitespace = ( any - 33..126 )+ >discard @{tok = TK_Whitespace;}; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol | + commc | commcc | whitespace ); + + action onError { + if ( tok != 0 ) { + char *rst_data; + + if ( tok == TK_Comment || tok == TK_Whitespace ) { + /* Reset comment status, don't send. */ + discard = false; + + /* Restart right at the error point if consuming whitespace or + * a comment. Consume may have spanned multiple buffers. */ + rst_data = fpc; + } + else { + /* Send the token. */ + token( tokstart, tokend - tokstart + 1 ); + + /* Restart right after the token. */ + rst_data = tokend+1; + } + + tokstart = 0; + fexec rst_data; + fgoto main; + } + } + + main := tokens >{tokstart=fpc;} @{tokend=fpc;} $!onError; +}%% + +%% write data; + +int Scanner::init( ) +{ + tok = 0; + tokstart = 0; + tokend = 0; + + %% write init; + return 1; +} + +int Scanner::execute( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + + %% write exec; + + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +int Scanner::finish( ) +{ + %% write eof; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + + +void token( char *data, int len ) +{ + cout << "<" << tok << "> "; + for ( int i = 0; i < len; i++ ) + cout << data[i]; + cout << '\n'; +} + +void test( char * data ) +{ + Scanner scanner; + scanner.init(); + scanner.execute( data, strlen(data) ); + scanner.finish(); + if ( tok != 0 && tok != TK_Comment && tok != TK_Whitespace ) + token( tokstart, tokend - tokstart + 1 ); +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "\n" + "/* Move ranges to the singles list. */\n" + "void RedFsmAp::move( RedStateAp *state )\n" + "{\n" + " RedTranst &range = state->outRange;\n" + " for ( int rpos = 0; rpos < range.length(); ) {\n" + " if ( can( range, rpos ) ) {\n" + " while ( range[rpos].value != range[rpos+1].value ) {\n" + " single.append( range[rpos+1] );\n" + " }\n" + " \n" + " range[rpos].highKey = range[rpos+1].highKey;\n" + " }\n" + " else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {\n" + " single.append( range[rpos] );\n" + " }\n" + " }\n" + "}\n" + "\n" ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" + "...\n" ); +} + +#ifdef _____OUTPUT_____ +<195> void +<195> RedFsmAp +<197> :: +<195> move +<40> ( +<195> RedStateAp +<42> * +<195> state +<41> ) +<123> { +<195> RedTranst +<38> & +<195> range +<61> = +<195> state +<211> -> +<195> outRange +<59> ; +<195> for +<40> ( +<195> int +<195> rpos +<61> = +<218> 0 +<59> ; +<195> rpos +<60> < +<195> range +<46> . +<195> length +<40> ( +<41> ) +<59> ; +<41> ) +<123> { +<195> if +<40> ( +<195> can +<40> ( +<195> range +<44> , +<195> rpos +<41> ) +<41> ) +<123> { +<195> while +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> value +<224> != +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<46> . +<195> value +<41> ) +<123> { +<195> single +<46> . +<195> append +<40> ( +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<41> ) +<59> ; +<125> } +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> highKey +<61> = +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<46> . +<195> highKey +<59> ; +<125> } +<195> else +<195> if +<40> ( +<195> keyOps +<211> -> +<195> span +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> lowKey +<44> , +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> highKey +<41> ) +<223> == +<218> 1 +<41> ) +<123> { +<195> single +<46> . +<195> append +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<41> ) +<59> ; +<125> } +<125> } +<125> } +<214> ->* +<215> .* +<192> "/*" +<193> L'"' +<192> L"'" +<240> ... +#endif diff --git a/test/cppscan3.rl b/test/cppscan3.rl new file mode 100644 index 0000000..afe832d --- /dev/null +++ b/test/cppscan3.rl @@ -0,0 +1,281 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +#define BUFSIZE 4096 + +char buf[BUFSIZE]; + +struct Scanner +{ + int cs, act; + char *tokstart, *tokend; + + void token( int tok ); + void run(); + + void init( ); + void execute( char *data, int len ); + int finish( ); +}; + +%%{ + machine Scanner; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + => { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + => { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + =>{ token( TK_Id );}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) + => { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + => { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + => { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + => { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' => {token( TK_NameSep );}; + '==' => {token( TK_EqualsEquals );}; + '!=' => {token( TK_NotEquals );}; + '&&' => {token( TK_AndAnd );}; + '||' => {token( TK_OrOr );}; + '*=' => {token( TK_MultAssign );}; + '/=' => {token( TK_DivAssign );}; + '%=' => {token( TK_PercentAssign );}; + '+=' => {token( TK_PlusAssign );}; + '-=' => {token( TK_MinusAssign );}; + '&=' => {token( TK_AmpAssign );}; + '^=' => {token( TK_CaretAssign );}; + '|=' => {token( TK_BarAssign );}; + '++' => {token( TK_PlusPlus );}; + '--' => {token( TK_MinusMinus );}; + '->' => {token( TK_Arrow );}; + '->*' => {token( TK_ArrowStar );}; + '.*' => {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' => { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) => { token( tokstart[0] );}; + + action comment { + token( TK_Comment ); + } + + # Comments and whitespace. + '/*' ( any* $0 '*/' @1 ) => comment; + '//' ( any* $0 '\n' @1 ) => comment; + ( any - 33..126 )+ => { token( TK_Whitespace );}; + + *|; +}%% + +%% write data; + +void Scanner::init( ) +{ + %% write init; +} + +/* Returns the count of bytes still in the buffer + * (shifted to the biginning) */ +void Scanner::execute( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + + %% write exec; +} + +int Scanner::finish( ) +{ + %% write eof; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + + +void Scanner::token( int tok ) +{ + const char *data = tokstart; + int len = tokend - tokstart; + cout << "<" << tok << "> "; + for ( int i = 0; i < len; i++ ) + cout << data[i]; + cout << '\n'; +} + +void test( char *buf ) +{ + int len = strlen( buf ); + std::ios::sync_with_stdio(false); + Scanner scanner; + scanner.init(); + + scanner.execute( buf, len ); + if ( scanner.cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } + + /* FIXME: Last token may get lost. */ + scanner.finish(); +} + +int main() +{ + test( + "\"\\\"hi\" /*\n" + "*/\n" + "44 .44\n" + "44. 44\n" + "44 . 44\n" + "44.44\n" + "_hithere22\n" + "\n" + ); + + test( + "'\\''\"\\n\\d'\\\"\"\n" + "hi\n" + "99\n" + ".99\n" + "99e-4\n" + "->*\n" + "||\n" + "0x98\n" + "0x\n" + "//\n" + "/* * */\n" + ); + + test( + "'\n" + "'\n" + ); + +} + +#ifdef _____OUTPUT_____ +<192> "\"hi" +<241> +<242> /* +*/ +<241> + +<218> 44 +<241> +<194> .44 +<241> + +<194> 44. +<241> +<218> 44 +<241> + +<218> 44 +<241> +<46> . +<241> +<218> 44 +<241> + +<194> 44.44 +<241> + +<195> _hithere22 +<193> '\'' +<192> "\n\d'\"" +<241> + +<195> hi +<241> + +<218> 99 +<241> + +<194> .99 +<241> + +<194> 99e-4 +<241> + +<214> ->* +<241> + +<226> || +<241> + +<220> 0x98 +<241> + +<218> 0 +<195> x +<241> + +<242> // + +<242> /* * */ +PARSE ERROR +#endif diff --git a/test/cppscan4.rl b/test/cppscan4.rl new file mode 100644 index 0000000..fa7499f --- /dev/null +++ b/test/cppscan4.rl @@ -0,0 +1,303 @@ +/* + * @LANG: d + */ + +module cppscan; + +import std.c.stdio; +import std.string; + +const int BUFSIZE = 2048; + +const int TK_Dlit = 192; +const int TK_Slit = 193; +const int TK_Float = 194; +const int TK_Id = 195; +const int TK_NameSep = 197; +const int TK_Arrow = 211; +const int TK_PlusPlus = 212; +const int TK_MinusMinus = 213; +const int TK_ArrowStar = 214; +const int TK_DotStar = 215; +const int TK_ShiftLeft = 216; +const int TK_ShiftRight = 217; +const int TK_IntegerDecimal = 218; +const int TK_IntegerOctal = 219; +const int TK_IntegerHex = 220; +const int TK_EqualsEquals = 223; +const int TK_NotEquals = 224; +const int TK_AndAnd = 225; +const int TK_OrOr = 226; +const int TK_MultAssign = 227; +const int TK_DivAssign = 228; +const int TK_PercentAssign = 229; +const int TK_PlusAssign = 230; +const int TK_MinusAssign = 231; +const int TK_AmpAssign = 232; +const int TK_CaretAssign = 233; +const int TK_BarAssign = 234; +const int TK_DotDotDot = 240; + + +class Scanner +{ + int line, col; + int tokStart; + int inlineDepth; + int count; + char[] tokBuf; + char[] nonTokBuf; + + void pass(char c) { nonTokBuf ~= c; } + void buf(char c) { tokBuf ~= c; } + void token( int id ) + { + /* Leader. */ + if ( nonTokBuf.length > 0 ) { + printf("%.*s", nonTokBuf); + nonTokBuf = ""; + } + + /* Token data. */ + printf("<%d>%.*s", id, tokBuf); + + tokBuf = ""; + } + + int cs, stack, top; + + %%{ + machine Scanner; + + action pass { pass(fc); } + action buf { buf(fc); } + + action emit_slit { token( TK_Slit ); } + action emit_dlit { token( TK_Dlit ); } + action emit_id { token( TK_Id ); } + action emit_integer_decimal { token( TK_IntegerDecimal ); } + action emit_integer_octal { token( TK_IntegerOctal ); } + action emit_integer_hex { token( TK_IntegerHex ); } + action emit_float { token( TK_Float ); } + action emit_symbol { token( tokBuf[0] ); } + action tokst { tokStart = col; } + + # Single and double literals. + slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit; + dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) >tokst $buf %emit_float; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @buf %{token( TK_NameSep );}; + deqs = '==' @buf %{token( TK_EqualsEquals );}; + neqs = '!=' @buf %{token( TK_NotEquals );}; + and_and = '&&' @buf %{token( TK_AndAnd );}; + or_or = '||' @buf %{token( TK_OrOr );}; + mult_assign = '*=' @buf %{token( TK_MultAssign );}; + percent_assign = '%=' @buf %{token( TK_PercentAssign );}; + plus_assign = '+=' @buf %{token( TK_PlusAssign );}; + minus_assign = '-=' @buf %{token( TK_MinusAssign );}; + amp_assign = '&=' @buf %{token( TK_AmpAssign );}; + caret_assign = '^=' @buf %{token( TK_CaretAssign );}; + bar_assign = '|=' @buf %{token( TK_BarAssign );}; + plus_plus = '++' @buf %{token( TK_PlusPlus );}; + minus_minus = '--' @buf %{token( TK_MinusMinus );}; + arrow = '->' @buf %{token( TK_Arrow );}; + arrow_star = '->*' @buf %{token( TK_ArrowStar );}; + dot_star = '.*' @buf %{token( TK_DotStar );}; + + # Buffer both items. * + div_assign = '/=' @{buf('/');buf(fc);} %{token( TK_DivAssign );}; + + # Double dot is sent as two dots. + dot_dot = '..' %{token('.'); buf('.'); token('.');}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' %{buf('.'); buf('.'); token( TK_DotDotDot );}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot | dot_dot_dot; + + # Single char symbols. + symbol = + ( punct - [./_"'] ) >tokst $buf %emit_symbol | + # Do not immediately buffer slash, may be start of comment. + '/' >tokst %{ buf('/'); token( '/' ); } | + # Dot covered by float. + '.' %emit_symbol; + + # Comments and whitespace. + commc = '/*' @{pass('/'); pass('*');} ( any* $0 '*/' @1 ) $pass; + commcc = '//' @{pass('/'); pass('/');} ( any* $0 '\n' @1 ) $pass; + whitespace = ( any - ( 0 | 33..126 ) )+ $pass; + + action onEOFChar { + /* On EOF char, write out the non token buffer. */ + printf("%.*s", nonTokBuf); + nonTokBuf = ""; + } + + # Using 0 as eof. If seeingAs a result all null characters get ignored. + EOF = 0 @onEOFChar; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol ); + nontok = ( commc | commcc | whitespace | EOF ); + + position = ( + '\n' @{ line += 1; col = 1; } | + [^\n] @{ col += 1; } )*; + + main := ( ( tokens | nontok )** ) & position; + }%% + + %% write data noprefix; + + void init( ) + { + /* A count of the number of characters in + * a token. Used for % sequences. */ + count = 0; + line = 1; + col = 1; + %% write init; + return 1; + } + + int execute( char* _data, int _len ) + { + char *p = _data; + char *pe = _data + _len; + + %% write exec; + + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + %% write eof; + + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +}; + +void test(char[] buf) +{ + Scanner scanner = new Scanner(); + scanner.init(); + scanner.execute( buf.ptr, buf.length ); + + /* The last token is ignored (because there is no next token). Send + * trailing null to force the last token into whitespace. */ + char eof_char = 0; + if ( scanner.execute( &eof_char, 1 ) <= 0 ) { + fprintf(stderr, "cppscan: scan failed\n"); + } +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "RedTransAp *RedFsmAp::reduceTrans( TransAp *trans )\n" + "{\n" + " RedAction *action = 0;\n" + " if ( trans->actionTable.length() > 0 ) {\n" + " if ( actionMap.insert( trans->actionTable, &action ) )\n" + " action->id = nextActionId++;\n" + " }\n" + " \n" + " RedStateAp *targ = (RedStateAp*)trans->toState;\n" + " if ( action == 0 ) {\n" + " delete trans;\n" + " return 0;\n" + " }\n" + "\n" + " trans->~TransAp();\n" + " inDict = new(trans) RedTransAp( targ, action, nextTransId++ );\n" + " transSet.insert( inDict );\n" + "}\n" + ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" + ); + + return 0; +} + +/+ _____OUTPUT_____ +/* + * Copyright + */ + +<195>RedTransAp <42>*<195>RedFsmAp<197>::<195>reduceTrans<40>( <195>TransAp <42>*<195>trans <41>) +<123>{ + <195>RedAction <42>*<195>action <61>= <218>0<59>; + <195>if <40>( <195>trans<211>-><195>actionTable<46>.<195>length<40>(<41>) <62>> <218>0 <41>) <123>{ + <195>if <40>( <195>actionMap<46>.<195>insert<40>( <195>trans<211>-><195>actionTable<44>, <38>&<195>action <41>) <41>) + <195>action<211>-><195>id <61>= <195>nextActionId<212>++<59>; + <125>} + + <195>RedStateAp <42>*<195>targ <61>= <40>(<195>RedStateAp<42>*<41>)<195>trans<211>-><195>toState<59>; + <195>if <40>( <195>action <223>== <218>0 <41>) <123>{ + <195>delete <195>trans<59>; + <195>return <218>0<59>; + <125>} + + <195>trans<211>-><126>~<195>TransAp<40>(<41>)<59>; + <195>inDict <61>= <195>new<40>(<195>trans<41>) <195>RedTransAp<40>( <195>targ<44>, <195>action<44>, <195>nextTransId<212>++ <41>)<59>; + <195>transSet<46>.<195>insert<40>( <195>inDict <41>)<59>; +<125>} +<214>->* +<215>.* +/*"*/ +<192>"/*" +<193>L'"' +<192>L"'" ++++++++++++++++++/ diff --git a/test/cppscan5.rl b/test/cppscan5.rl new file mode 100644 index 0000000..3c0035b --- /dev/null +++ b/test/cppscan5.rl @@ -0,0 +1,277 @@ +/* + * @LANG: d + */ + +/* + * Test in and out state actions. + */ + +import std.c.stdio; +import std.string; + +static const int TK_Dlit = 192; +static const int TK_Slit = 193; +static const int TK_Float = 194; +static const int TK_Id = 195; +static const int TK_NameSep = 197; +static const int TK_Arrow = 211; +static const int TK_PlusPlus = 212; +static const int TK_MinusMinus = 213; +static const int TK_ArrowStar = 214; +static const int TK_DotStar = 215; +static const int TK_ShiftLeft = 216; +static const int TK_ShiftRight = 217; +static const int TK_IntegerDecimal = 218; +static const int TK_IntegerOctal = 219; +static const int TK_IntegerHex = 220; +static const int TK_EqualsEquals = 223; +static const int TK_NotEquals = 224; +static const int TK_AndAnd = 225; +static const int TK_OrOr = 226; +static const int TK_MultAssign = 227; +static const int TK_DivAssign = 228; +static const int TK_PercentAssign = 229; +static const int TK_PlusAssign = 230; +static const int TK_MinusAssign = 231; +static const int TK_AmpAssign = 232; +static const int TK_CaretAssign = 233; +static const int TK_BarAssign = 234; +static const int TK_DotDotDot = 240; +static const int TK_Whitespace = 241; +static const int TK_Comment = 242; + +class Scanner +{ + int cs, act; + char *tokstart, tokend; + + void token( int tok ) + { + char *data = tokstart; + int len = tokend - tokstart; + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + printf( "%c", data[i] ); + printf( "\n" ); + } + + %%{ + + machine Scanner; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + => { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + => { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + =>{ token( TK_Id );}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) + => { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + => { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + => { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + => { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' => {token( TK_NameSep );}; + '==' => {token( TK_EqualsEquals );}; + '!=' => {token( TK_NotEquals );}; + '&&' => {token( TK_AndAnd );}; + '||' => {token( TK_OrOr );}; + '*=' => {token( TK_MultAssign );}; + '/=' => {token( TK_DivAssign );}; + '%=' => {token( TK_PercentAssign );}; + '+=' => {token( TK_PlusAssign );}; + '-=' => {token( TK_MinusAssign );}; + '&=' => {token( TK_AmpAssign );}; + '^=' => {token( TK_CaretAssign );}; + '|=' => {token( TK_BarAssign );}; + '++' => {token( TK_PlusPlus );}; + '--' => {token( TK_MinusMinus );}; + '->' => {token( TK_Arrow );}; + '->*' => {token( TK_ArrowStar );}; + '.*' => {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' => { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) => { token( tokstart[0] );}; + + action comment { + token( TK_Comment ); + } + + # Comments and whitespace. + '/*' ( any* $0 '*/' @1 ) => comment; + '//' ( any* $0 '\n' @1 ) => comment; + ( any - 33..126 )+ => { token( TK_Whitespace );}; + + *|; + + }%% + + %% write data noprefix; + + void init( ) + { + %% write init; + } + + void execute( char* data, int len ) + { + char *p = data; + char *pe = data + len; + + %% write exec; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + %% write eof; + + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +}; + +static const int BUFSIZE = 12; + +void test( char buf[] ) +{ + Scanner scanner = new Scanner(); + scanner.init(); + + scanner.execute( buf.ptr, buf.length ); + if ( scanner.cs == Scanner.error ) { + /* Machine failed before finding a token. */ + printf("PARSE ERROR\n"); + } + scanner.finish(); + return 0; +} + +int main() +{ + test( + "\"\\\"hi\" /*\n" + "*/\n" + "44 .44\n" + "44. 44\n" + "44 . 44\n" + "44.44\n" + "_hithere22\n" + "\n" + ); + + test( + "'\\''\"\\n\\d'\\\"\"\n" + "hi\n" + "99\n" + ".99\n" + "99e-4\n" + "->*\n" + "||\n" + "0x98\n" + "0x\n" + "//\n" + "/* * */\n" + ); + + test( + "'\n" + "'\n" + ); + + return 0; +} + +/+ _____OUTPUT_____ +<192> "\"hi" +<241> +<242> /* +*/ +<241> + +<218> 44 +<241> +<194> .44 +<241> + +<194> 44. +<241> +<218> 44 +<241> + +<218> 44 +<241> +<46> . +<241> +<218> 44 +<241> + +<194> 44.44 +<241> + +<195> _hithere22 +<193> '\'' +<192> "\n\d'\"" +<241> + +<195> hi +<241> + +<218> 99 +<241> + +<194> .99 +<241> + +<194> 99e-4 +<241> + +<214> ->* +<241> + +<226> || +<241> + +<220> 0x98 +<241> + +<218> 0 +<195> x +<241> + +<242> // + +<242> /* * */ +PARSE ERROR ++++++++++++++++++++/ diff --git a/test/element1.rl b/test/element1.rl new file mode 100644 index 0000000..3113058 --- /dev/null +++ b/test/element1.rl @@ -0,0 +1,108 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +using namespace std; + +struct LangEl +{ + int key; + char *name; +}; + +struct Fsm +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( LangEl *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); + +}; + +%%{ + machine Fsm; + + alphtype int; + getkey fpc->key; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${cout << fpc->name << endl;} + %/{cout << "accept" << endl;}; +}%% + +%% write data; + +int Fsm::init( ) +{ + %% write init; + return 0; +} + +int Fsm::execute( LangEl *_data, int _len ) +{ + LangEl *p = _data; + LangEl *pe = _data+_len; + %% write exec; + + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int Fsm::finish( ) +{ + %% write eof; + + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int main( ) +{ + static Fsm fsm; + static LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm.init(); + fsm.execute( lel, 5 ); + fsm.finish(); + return 0; +} + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/element2.rl b/test/element2.rl new file mode 100644 index 0000000..55f7610 --- /dev/null +++ b/test/element2.rl @@ -0,0 +1,84 @@ +/* + * @LANG: c + */ + +#include <stdio.h> + +struct LangEl +{ + int key; + char *name; +}; + +struct fsm +{ + int cs; +}; + +%%{ + machine fsm; + alphtype int; + getkey fpc->key; + variable curstate fsm->cs; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${printf("%s\n", fpc->name);} + %/{printf("accept\n");}; +}%% + +%% write data; + +void fsm_init( struct fsm *fsm ) +{ + %% write init; +} + +void fsm_execute( struct fsm *fsm, struct LangEl *_data, int _len ) +{ + struct LangEl *p = _data; + struct LangEl *pe = _data+_len; + + %% write exec; +} + +int fsm_finish( struct fsm *fsm ) +{ + %% write eof; + + if ( fsm->cs == fsm_error ) + return -1; + if ( fsm->cs >= fsm_first_final ) + return 1; + return 0; +} + +int main() +{ + static struct fsm fsm; + static struct LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm_init( &fsm ); + fsm_execute( &fsm, lel, 5 ); + fsm_finish( &fsm ); + + return 0; +} + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/element3.rl b/test/element3.rl new file mode 100644 index 0000000..773e801 --- /dev/null +++ b/test/element3.rl @@ -0,0 +1,144 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <objc/Object.h> + +struct LangEl +{ + int key; + char *name; +}; + +@interface Fsm : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (int) executeWithData:( struct LangEl *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end; + + +@implementation Fsm + +%%{ + machine Fsm; + + alphtype int; + getkey fpc->key; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${printf("%s\n", fpc->name);} + %/{printf("accept\n");}; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 0; +} + +- (int) executeWithData:( struct LangEl *)_data len:(int)_len; +{ + struct LangEl *p = _data; + struct LangEl *pe = _data + _len; + %% write exec; + + if ( self->cs == Fsm_error ) + return -1; + return ( self->cs >= Fsm_first_final ) ? 1 : 0; +} + +- (int) finish; +{ + %% write eof; + if ( self->cs == Fsm_error ) + return -1; + return ( self->cs >= Fsm_first_final ) ? 1 : 0; +} + + +@end + +int main() +{ + static Fsm *fsm; + static struct LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm = [[Fsm alloc] init]; + [fsm initFsm]; + [fsm executeWithData:lel len:5]; + [fsm finish]; + + return 0; +} + +@interface Fsm2 : Object +{ + // The current state may be read and written to from outside of the + // machine. From within action code, curs is -1 and writing to it has no + // effect. + @public + int cs; + + @protected + +} + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (int) +executeWithElements:(int) elements +length:(unsigned)length; + +@end + +@implementation Fsm2 +- (int) +executeWithElements:(int)elements +length:(unsigned)length; +{ + return 0; +} +@end + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/eofact.h b/test/eofact.h new file mode 100644 index 0000000..d547f87 --- /dev/null +++ b/test/eofact.h @@ -0,0 +1,9 @@ +#ifndef _EOFACT_H +#define _EOFACT_H + +struct eofact +{ + int cs; +}; + +#endif diff --git a/test/eofact.rl b/test/eofact.rl new file mode 100644 index 0000000..890b73c --- /dev/null +++ b/test/eofact.rl @@ -0,0 +1,50 @@ +/* + * @LANG: indep + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P + */ +%% +%%{ + machine eofact; + + action a1 { prints "a1\n"; } + action a2 { prints "a2\n"; } + action a3 { prints "a3\n"; } + action a4 { prints "a4\n"; } + + + main := ( + 'hello' @eof a1 %eof a2 '\n'? | + 'there' @eof a3 %eof a4 + ); + +}%% +/* _____INPUT_____ +"" +"h" +"hell" +"hello" +"hello\n" +"t" +"ther" +"there" +"friend" +_____INPUT_____ */ +/* _____OUTPUT_____ +a1 +a3 +FAIL +a1 +FAIL +a1 +FAIL +a2 +ACCEPT +ACCEPT +a3 +FAIL +a3 +FAIL +a4 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/erract1.rl b/test/erract1.rl new file mode 100644 index 0000000..ee0237d --- /dev/null +++ b/test/erract1.rl @@ -0,0 +1,145 @@ +/* + * @LANG: c++ + */ + +/* + * Test error actions. + */ + +#include <iostream> +#include <stdio.h> +#include <string.h> + +using namespace std; + +struct ErrAct +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine ErrAct; + + action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); } + action expect_digit { printf(" DIGIT\n"); } + action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); } + + float = ( + ( + [\-+] >err expect_digit_plus_minus %err expect_digit | + "" + ) + ( [0-9] [0-9]* $err expect_digit_decimal ) + ( '.' [0-9]+ $err expect_digit )? + ); + + main := float '\n'; +}%% + +%% write data; + +int ErrAct::init( ) +{ + %% write init; + return 0; +} + +int ErrAct::execute( const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + %% write exec; + + if ( cs == ErrAct_error ) + return -1; + if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +int ErrAct::finish( ) +{ + %% write eof; + if ( cs == ErrAct_error ) + return -1; + if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + ErrAct errAct; + errAct.init(); + errAct.execute( buf, strlen(buf) ); + if ( errAct.finish() > 0 ) + cout << "ACCEPT" << endl; + else + cout << "FAIL" << endl; +} + +int main() +{ + test( "1\n" ); + test( "+1\n" ); + test( "-1\n" ); + test( "1.1\n" ); + test( "+1.1\n" ); + test( "-1.1\n" ); + test( "a\n" ); + test( "-\n" ); + test( "+\n" ); + test( "-a\n" ); + test( "+b\n" ); + test( "1.\n" ); + test( "1d\n" ); + test( "1.d\n" ); + test( "1.1d\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT + DIGIT PLUS MINUS +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT DECIMAL +FAIL + DIGIT +FAIL + DIGIT +FAIL +#endif diff --git a/test/erract2.rl b/test/erract2.rl new file mode 100644 index 0000000..a4d0ef9 --- /dev/null +++ b/test/erract2.rl @@ -0,0 +1,80 @@ +/* + * @LANG: indep + * + * Test error actions. + */ +%% +%%{ + machine ErrAct; + + action err_start { prints "err_start\n"; } + action err_all { prints "err_all\n"; } + action err_middle { prints "err_middle\n"; } + action err_out { prints "err_out\n"; } + + action eof_start { prints "eof_start\n"; } + action eof_all { prints "eof_all\n"; } + action eof_middle { prints "eof_middle\n"; } + action eof_out { prints "eof_out\n"; } + + main := ( 'hello' + >err err_start $err err_all <>err err_middle %err err_out + >eof eof_start $eof eof_all <>eof eof_middle %eof eof_out + ) '\n'; +}%% + +/* _____INPUT_____ +"" +"h" +"x" +"he" +"hx" +"hel" +"hex" +"hell" +"helx" +"hello" +"hellx" +"hello\n" +"hellox" +_____INPUT_____ */ + +/* _____OUTPUT_____ +eof_start +eof_all +FAIL +eof_all +eof_middle +FAIL +err_start +err_all +FAIL +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +eof_all +eof_out +FAIL +err_all +err_middle +FAIL +ACCEPT +err_all +err_out +FAIL +_____OUTPUT_____ */ diff --git a/test/erract3.rl b/test/erract3.rl new file mode 100644 index 0000000..5490b67 --- /dev/null +++ b/test/erract3.rl @@ -0,0 +1,105 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#define IDENT_BUFLEN 256 + +struct erract +{ + int cs; +}; + +%%{ + machine erract; + variable curstate fsm->cs; + + # The data that is to go into the fsm structure. + action hello_fails { printf("hello fails\n");} + + newline = ( any | '\n' @{printf("newline\n");} )*; + hello = 'hello\n'* $lerr hello_fails @eof hello_fails; + main := newline | hello; +}%% + +%% write data; + +void erract_init( struct erract *fsm ) +{ + %% write init; +} + +void erract_execute( struct erract *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + %% write exec; +} + +int erract_finish( struct erract *fsm ) +{ + %% write eof; + + if ( fsm->cs == erract_error ) + return -1; + else if ( fsm->cs >= erract_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#include <string.h> + +struct erract fsm; + +void test( char *buf ) +{ + int len = strlen(buf); + erract_init( &fsm ); + erract_execute( &fsm, buf, len ); + if ( erract_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "hello\n" + "hello\n" + "hello\n" + ); + + test( + "hello\n" + "hello\n" + "hello there\n" + ); + + test( + "hello\n" + "hello\n" + "he" ); + + test( "" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +newline +newline +newline +ACCEPT +newline +newline +hello fails +newline +ACCEPT +newline +newline +hello fails +ACCEPT +ACCEPT +#endif diff --git a/test/erract4.rl b/test/erract4.rl new file mode 100644 index 0000000..1a753ef --- /dev/null +++ b/test/erract4.rl @@ -0,0 +1,135 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <objc/Object.h> + +#define IDENT_BUFLEN 256 + +@interface ErrAct : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation ErrAct + +%%{ + machine ErrAct; + + # The data that is to go into the fsm structure. + action hello_fails { printf("hello fails\n");} + + newline = ( any | '\n' @{printf("newline\n");} )*; + hello = 'hello\n'* $^hello_fails @/hello_fails; + main := newline | hello; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const char *)_data len:(int)_len; +{ + const char *p = _data; + const char *pe = _data + _len; + %% write exec; +} + +- (int) finish; +{ + %% write eof; + if ( cs == ErrAct_error ) + return -1; + else if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +@end + +#include <stdio.h> +#include <string.h> +#define BUFSIZE 2048 + +ErrAct *fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen(buf); + fsm = [[ErrAct alloc] init]; + + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "hello\n" + "hello\n" + "hello\n" + ); + + test( + "hello\n" + "hello\n" + "hello there\n" + ); + + test( + "hello\n" + "hello\n" + "he" ); + + test( "" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +newline +newline +newline +ACCEPT +newline +newline +hello fails +newline +ACCEPT +newline +newline +hello fails +ACCEPT +ACCEPT +#endif diff --git a/test/erract5.rl b/test/erract5.rl new file mode 100644 index 0000000..73edec8 --- /dev/null +++ b/test/erract5.rl @@ -0,0 +1,146 @@ +/* + * @LANG: obj-c + */ + +/* + * Test error actions. + */ + +#include <stdio.h> +#include <string.h> +#include <objc/Object.h> + + +@interface ErrAct : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation ErrAct + +%%{ + machine ErrAct; + + action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); } + action expect_digit { printf(" DIGIT\n"); } + action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); } + + float = ( + ( + [\-+] >!expect_digit_plus_minus %!expect_digit | + "" + ) + ( [0-9] [0-9]* $!expect_digit_decimal ) + ( '.' [0-9]+ $!expect_digit )? + ); + + main := float '\n'; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const char *)_data len:(int)_len; +{ + const char *p = _data; + const char *pe = _data + _len; + %% write exec; +} + +- (int) finish; +{ + %% write eof; + if ( cs == ErrAct_error ) + return -1; + else if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + + +@end + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + ErrAct *errAct = [[ErrAct alloc] init]; + [errAct initFsm]; + [errAct executeWithData:buf len:strlen(buf)]; + if ( [errAct finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "1\n" ); + test( "+1\n" ); + test( "-1\n" ); + test( "1.1\n" ); + test( "+1.1\n" ); + test( "-1.1\n" ); + test( "a\n" ); + test( "-\n" ); + test( "+\n" ); + test( "-a\n" ); + test( "+b\n" ); + test( "1.\n" ); + test( "1d\n" ); + test( "1.d\n" ); + test( "1.1d\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT + DIGIT PLUS MINUS +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT DECIMAL +FAIL + DIGIT +FAIL + DIGIT +FAIL +#endif diff --git a/test/errintrans.rl b/test/errintrans.rl new file mode 100644 index 0000000..80b5a5a --- /dev/null +++ b/test/errintrans.rl @@ -0,0 +1,84 @@ +/* + * @LANG: c + */ + +/* + * Test of a transition going to the error state. + */ + +#include <stdio.h> +#define BUFSIZE 2048 + +struct errintrans +{ + int cs; +}; + +%%{ + machine errintrans; + variable curstate fsm->cs; + + char = any - (digit | '\n'); + line = char* "\n"; + main := line+; +}%% + +%% write data; + +void errintrans_init( struct errintrans *fsm ) +{ + %% write init; +} + +void errintrans_execute( struct errintrans *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int errintrans_finish( struct errintrans *fsm ) +{ + %% write eof; + + if ( fsm->cs == errintrans_error ) + return -1; + if ( fsm->cs >= errintrans_first_final ) + return 1; + return 0; +} + + +struct errintrans fsm; +#include <string.h> + +void test( char *buf ) +{ + int len = strlen( buf ); + errintrans_init( &fsm ); + errintrans_execute( &fsm, buf, len ); + if ( errintrans_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "good, does not have numbers\n" + ); + + test( + "bad, has numbers 666\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +FAIL +#endif diff --git a/test/forder1.rl b/test/forder1.rl new file mode 100644 index 0000000..a6366cb --- /dev/null +++ b/test/forder1.rl @@ -0,0 +1,100 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable curstate fsm->cs; + + second = 'b' + >{printf("enter b1\n");} + >{printf("enter b2\n");} + ; + + first = 'a' + %{printf("leave a\n");} + @{printf("finish a\n");} + ; + + main := first . second . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + %% write eof; + + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen(buf); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "ab\n"); + test( "abx\n"); + test( "" ); + + test( + "ab\n" + "fail after newline\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +finish a +leave a +enter b1 +enter b2 +ACCEPT +finish a +leave a +enter b1 +enter b2 +FAIL +FAIL +finish a +leave a +enter b1 +enter b2 +FAIL +#endif diff --git a/test/forder2.rl b/test/forder2.rl new file mode 100644 index 0000000..9592179 --- /dev/null +++ b/test/forder2.rl @@ -0,0 +1,135 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +/* + * After the fact start and ending transitions. Behaves like constructors of + * and destructors in c++. + */ + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable curstate fsm->cs; + + inner = 'inner' + >{printf("enter inner\n");} + ${printf("inside inner\n");} + %{printf("leave inner\n");} + ; + + outter = inner + >{printf("enter outter\n");} + ${printf("inside outter\n");} + %{printf("leave outter\n");} + ; + + main := outter . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + %% write eof; + + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "inner\n"); + + test( + "inner\n" + "foobar\n" + ); + + test( "" ); + test( "\n" ); + test( "inn\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +leave inner +leave outter +ACCEPT +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +leave inner +leave outter +FAIL +FAIL +FAIL +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +FAIL +#endif diff --git a/test/forder3.rl b/test/forder3.rl new file mode 100644 index 0000000..7a659bb --- /dev/null +++ b/test/forder3.rl @@ -0,0 +1,106 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable curstate fsm->cs; + + m1 = ( "" %{printf("enter m1 aa\n");} | + 'aa'* >{printf("enter m1 aa\n");} %{printf("leave m1 aa\n");} ) + 'b' @{printf("through m1 b\n");} . 'b'* . 'a'*; + + m2 = 'bbb'* 'aa'*; + + main := ( + m1 %{printf("accept m1\n");} | + "" %{printf("enter m2\n");} | + m2 >{printf("enter m2\n");} %{printf("accpet m2\n");} + ) . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + %% write eof; + + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "aaaaaabbbaa\n" ); + test( "\n" ); + test( "bbbbbbaaaaaaa\n" ); + test( "bbbbbbaaaaaa\n" ); + test( "aaaaa\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +enter m1 aa +enter m2 +leave m1 aa +through m1 b +accept m1 +ACCEPT +enter m2 +accpet m2 +ACCEPT +enter m1 aa +leave m1 aa +through m1 b +enter m2 +accept m1 +ACCEPT +enter m1 aa +leave m1 aa +through m1 b +enter m2 +accept m1 +accpet m2 +ACCEPT +enter m1 aa +enter m2 +FAIL +#endif diff --git a/test/gotocallret1.rl b/test/gotocallret1.rl new file mode 100644 index 0000000..54626dd --- /dev/null +++ b/test/gotocallret1.rl @@ -0,0 +1,113 @@ +/* + * @LANG: indep + */ + +/* + * Demonstrate the use of goto, call and return. This machine expects either a + * lower case char or a digit as a command then a space followed by the command + * arg. If the command is a char, then the arg must be an a string of chars. + * If the command is a digit, then the arg must be a string of digits. This + * choice is determined by action code, rather than though transition + * desitinations. + */ + +char comm; +int top; +int stack[32]; +%% +%%{ + machine GotoCallRet; + + # A reference to a state in an unused action caused a segfault in 5.8. */ + action unusedAction { fentry(garble_line); } + + action err_garbling_line { prints "error: garbling line\n"; } + action goto_main { fgoto main; } + action recovery_failed { prints "error: failed to recover\n"; } + + # Error machine, consumes to end of + # line, then starts the main line over. + garble_line := ( (any-'\n')*'\n') + >err_garbling_line + @goto_main + $/recovery_failed; + + action hold_and_return {fhold; fret;} + + # Look for a string of alphas or of digits, + # on anything else, hold the character and return. + alp_comm := alpha+ $!hold_and_return; + dig_comm := digit+ $!hold_and_return; + + # Choose which to machine to call into based on the command. + action comm_arg { + if ( comm >= 'a' ) + fcall alp_comm; + else + fcall dig_comm; + } + + # Specifies command string. Note that the arg is left out. + command = ( + [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n' + ) @{prints "correct command\n";}; + + # Any number of commands. If there is an + # error anywhere, garble the line. + main := command* $!{fhold;fgoto garble_line;}; +}%% +/* _____INPUT_____ +"lkajsdf\n" +"2134\n" +"(\n" +"\n" +"*234234()0909 092 -234aslkf09`1 11\n" +"1\n" +"909\n" +"1 a\n" +"11 1\n" +"a 1\n" +"aa a\n" +"1 1\n" +"1 123456\n" +"a a\n" +"a abcdef\n" +"h" +"a aa1" +_____INPUT_____ */ +/* _____OUTPUT_____ +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +FAIL +error: garbling line +error: failed to recover +FAIL +_____OUTPUT_____ */ diff --git a/test/gotocallret2.rl b/test/gotocallret2.rl new file mode 100644 index 0000000..4a3bc0e --- /dev/null +++ b/test/gotocallret2.rl @@ -0,0 +1,77 @@ +/* + * @LANG: indep + */ + +char comm; +int top; +int stack[32]; +ptr tokstart; +ptr tokend; +int act; +int val; +%% +%%{ + machine GotoCallRet; + + sp = ' '; + + handle := any @{ + prints "handle "; + fhold; + if ( val == 1 ) fnext *fentry(one); + if ( val == 2 ) fnext *fentry(two); + if ( val == 3 ) fnext main; + }; + + one := |* + '{' => { prints "{ "; fcall *fentry(one); }; + "[" => { prints "[ "; fcall *fentry(two); }; + "}" sp* => { prints "} "; fret; }; + [a-z]+ => { prints "word "; val = 1; fgoto *fentry(handle); }; + ' ' => { prints "space "; }; + *|; + + two := |* + '{' => { prints "{ "; fcall *fentry(one); }; + "[" => { prints "[ "; fcall *fentry(two); }; + ']' sp* => { prints "] "; fret; }; + [a-z]+ => { prints "word "; val = 2; fgoto *fentry(handle); }; + ' ' => { prints "space "; }; + *|; + + main := |* + '{' => { prints "{ "; fcall one; }; + "[" => { prints "[ "; fcall two; }; + [a-z]+ => { prints "word "; val = 3; fgoto handle; }; + [a-z] ' foil' => { prints "this is the foil";}; + ' ' => { prints "space "; }; + '\n'; + *|; +}%% +/* _____INPUT_____ +"{a{b[c d]d}c}\n" +"[a{b[c d]d}c}\n" +"[a[b]c]d{ef{g{h}i}j}l\n" +"{{[]}}\n" +"a b c\n" +"{a b c}\n" +"[a b c]\n" +"{]\n" +"{{}\n" +"[[[[[[]]]]]]\n" +"[[[[[[]]}]]]\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +{ word handle { word handle [ word handle space word handle ] word handle } word handle } ACCEPT +[ word handle { word handle [ word handle space word handle ] word handle } word handle FAIL +[ word handle [ word handle ] word handle ] word handle { word handle { word handle { word handle } word handle } word handle } word handle ACCEPT +{ { [ ] } } ACCEPT +word handle space word handle space word handle ACCEPT +{ word handle space word handle space word handle } ACCEPT +[ word handle space word handle space word handle ] ACCEPT +{ FAIL +{ { } FAIL +[ [ [ [ [ [ ] ] ] ] ] ] ACCEPT +[ [ [ [ [ [ ] ] FAIL +_____OUTPUT_____ */ + diff --git a/test/high1.rl b/test/high1.rl new file mode 100644 index 0000000..2ad1b60 --- /dev/null +++ b/test/high1.rl @@ -0,0 +1,183 @@ +/* + * @LANG: c + * @ALLOW_GENFLAGS: -T0 -T1 -G0 -G1 -G2 + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +struct high +{ + int cs; +}; + +%%{ + machine high; + variable curstate fsm->cs; + + # We Want the header portion. + alphtype unsigned int; + + main := ( + 0x20 .. 0xefffffff @1 @{printf("gothigh1\n");} | + 0xf0000000 @1 @{printf("gothigh1\n");} | + 0x200 .. 0xfe000000 @1 @{printf("gothigh2\n");} | + any @0 @{printf("else\n");} + )*; +}%% + +%% write data; + +void high_init( struct high *fsm ) +{ + %% write init; +} + +void high_execute( struct high *fsm, const unsigned int *_data, int _len ) +{ + const unsigned int *p = _data; + const unsigned int *pe = _data+_len; + + %% write exec; +} + +int high_finish( struct high *fsm ) +{ + %% write eof; + + if ( fsm->cs == high_error ) + return -1; + if ( fsm->cs >= high_first_final ) + return 1; + return 0; +} + +struct high high; + +#define BUFSIZE 1024 +char cbuf[BUFSIZE]; +unsigned int buf[BUFSIZE]; +int buflen = 0; +char numbuf[9]; +int numlen = 0; + +struct tokenizer +{ + int cs; +}; + +%%{ + machine tokenizer; + variable curstate fsm->cs; + + action bufdigit { + if ( numlen < 8 ) + numbuf[numlen++] = fc; + } + + action writeDigit { + /* Null terminate the buffer storing the number and reset. */ + numbuf[numlen] = 0; + numlen = 0; + + /* Store the number in the buf. If the buf is full then + * flush and reset the buffer. */ + buf[buflen++] = strtoul( numbuf, 0, 16 ); + if ( buflen == BUFSIZE ) { + high_execute( &high, buf, BUFSIZE ); + buflen = 0; + } + } + + action finish { + if ( buflen > 0 ) + high_execute( &high, buf, buflen ); + if ( high_finish( &high ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + } + + num = ( digit | 'a'..'f' )+ $bufdigit %writeDigit; + main := ( num $1 %0 | space )* %/finish; +}%% + +%% write data; + +void tokenizer_init( struct tokenizer *fsm ) +{ + %% write init; +} + +void tokenizer_execute( struct tokenizer *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int tokenizer_finish( struct tokenizer *fsm ) +{ + %% write eof; + + if ( fsm->cs == tokenizer_error ) + return -1; + if ( fsm->cs >= tokenizer_first_final ) + return 1; + return 0; +} + +struct tokenizer tok; + +void test( char *cbuf ) +{ + int len = strlen( cbuf ); + high_init( &high ); + tokenizer_init( &tok ); + tokenizer_execute( &tok, cbuf, len ); + if ( tokenizer_finish( &tok ) <= 0 ) + printf("Tokenizer FAIL\n"); +} + +char data[] = + "10 20 30 40 50 200 300 400 \n" + "d0000000 f0000000 fd000000 fe000000\n" + "ff000000 ffffffffffffffffffffffffff\n" + "ff\n"; + +int main() +{ + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +else +gothigh1 +gothigh1 +gothigh1 +gothigh1 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh2 +gothigh2 +else +else +gothigh1 +ACCEPT +#endif diff --git a/test/high2.rl b/test/high2.rl new file mode 100644 index 0000000..1aeb9b3 --- /dev/null +++ b/test/high2.rl @@ -0,0 +1,104 @@ +/* + * @LANG: c++ + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <string.h> + +struct Fsm +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const unsigned char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Fsm; + + alphtype unsigned char; + + # Indicate we got the high character. + action gothigh { + printf("yes\n"); + } + + main := 0xe8 @gothigh '\n'; +}%% + +%% write data; + +int Fsm::init( ) +{ + %% write init; + return 0; +} + +int Fsm::execute( const unsigned char *_data, int _len ) +{ + const unsigned char *p = _data; + const unsigned char *pe = _data+_len; + %% write exec; + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int Fsm::finish() +{ + %% write eof; + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +Fsm fsm; + +void test( unsigned char *buf, int len ) +{ + fsm.init(); + fsm.execute( buf, len ); + if ( fsm.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +unsigned char data1[] = { 0xe8, 10 }; +unsigned char data2[] = { 0xf8, 10 }; + +int main() +{ + test( data1, 2 ); + test( data2, 2 ); + return 0; +} + +#ifdef _____OUTPUT_____ +yes +ACCEPT +FAIL +#endif diff --git a/test/high3.rl b/test/high3.rl new file mode 100644 index 0000000..03d2a74 --- /dev/null +++ b/test/high3.rl @@ -0,0 +1,112 @@ +/* + * @LANG: obj-c + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <objc/Object.h> + +@interface Fsm : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const unsigned char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation Fsm + +%%{ + machine Fsm; + + alphtype unsigned char; + + # Indicate we got the high character. + action gothigh { + printf("yes\n"); + } + + main := 0xe8 @gothigh '\n'; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const unsigned char *)_data len:(int)_len; +{ + const unsigned char *p = _data; + const unsigned char *pe = _data + _len; + %% write exec; +} + +- (int) finish; +{ + %% write eof; + if ( cs == Fsm_error ) + return -1; + else if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + + +@end + + +#define BUFSIZE 2048 + +Fsm *fsm; +unsigned char buf[BUFSIZE]; + +void test( unsigned char *buf, int len ) +{ + fsm = [[Fsm alloc] init]; + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +unsigned char data1[] = { 0xe8, 10 }; +unsigned char data2[] = { 0xf8, 10 }; + +int main() +{ + test( data1, 2 ); + test( data2, 2 ); + return 0; +} + +#ifdef _____OUTPUT_____ +yes +ACCEPT +FAIL +#endif diff --git a/test/include1.rl b/test/include1.rl new file mode 100644 index 0000000..30145de --- /dev/null +++ b/test/include1.rl @@ -0,0 +1,28 @@ +/* + * @LANG: c + * @IGNORE: yes + * + * Provides definitions for include tests. + */ + +%%{ + machine include_test_1; + + action A {printf(" a1");} + action B {printf(" b1");} + + action NonRef1 {printf(" nr1");} + + a1 = 'a' @A; + b1 = 'b' @B; +}%% + +%%{ + machine include_test_2; + + action NonRef2 {printf(" nr2");} + + a2 = 'a' @{printf(" a2");}; + b2 = 'b' @{printf(" b2");}; +}%% + diff --git a/test/include2.rl b/test/include2.rl new file mode 100644 index 0000000..68ab007 --- /dev/null +++ b/test/include2.rl @@ -0,0 +1,52 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +%%{ + machine include_test_4; + + action NonRef3 {printf(" nr3");} + + a3 = 'a'@{printf(" a3");}; + b3 = 'b'@{printf(" b3");}; + +}%% + +%%{ + machine include_test_1; + + include "include1.rl"; + + include include_test_2 "include1.rl"; + + include include_test_4; + + main := + a1 b1 @NonRef1 + a2 b2 @NonRef2 + a3 b3 @NonRef3 + 0 @{fbreak;}; +}%% + +%% write data; + +void test( char *p ) +{ + int cs; + %% write init; + %% write exec noend; + printf("\n"); +} + +int main() +{ + test( "ababab" ); + return 0; +} + +#ifdef _____OUTPUT_____ + a1 b1 nr1 a2 b2 nr2 a3 b3 nr3 +#endif diff --git a/test/java1.rl b/test/java1.rl new file mode 100644 index 0000000..128386f --- /dev/null +++ b/test/java1.rl @@ -0,0 +1,49 @@ +/* + * @LANG: java + * @ALLOW_GENFLAGS: -T0 + */ + +class java1 +{ + %%{ + machine java1; + + one := 'one\n'; + two := 'two\n'; + four := 'four\n'; + + main := + ( 'hello' | 'there' | 'friend' ) + '\n' @{int s = fentry(one); fgoto *s; char c = fc;} + ( 'one' | 'two' | 'four' ) '\n'; + }%% + + %% write data; + + static void test( char data[] ) + { + int cs, p = 0, pe = data.length; + int top; + + %% write init; + %% write exec; + + if ( cs >= java1_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + + public static void main( String args[] ) + { + test( "hello\none\n".toCharArray() ); + test( "there\ntwo\n".toCharArray() ); + test( "friend\nfour\n".toCharArray() ); + } +} + +/* _____OUTPUT_____ +ACCEPT +FAIL +FAIL +*/ diff --git a/test/java2.rl b/test/java2.rl new file mode 100644 index 0000000..61d9ac9 --- /dev/null +++ b/test/java2.rl @@ -0,0 +1,51 @@ +/* + * @LANG: java + * @ALLOW_GENFLAGS: -T0 + */ + +class java2 +{ + %%{ + machine java1; + alphtype int; + + main := 1 2 3 4 ( + 5 6 7 8 | + 9 10 11 12 + ) 1073741824; + + }%% + + %% write data; + + static void test( int data[] ) + { + int cs, p = 0, pe = data.length; + int top; + + %% write init; + %% write exec; + + if ( cs >= java1_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + + static final int t1[] = { 1, 2, 3, 4, 5, 6, 7, 8, 1073741824 }; + static final int t2[] = { 1, 2, 3, 4, 9, 10, 11, 12, 1073741824 }; + static final int t3[] = { 1, 2, 3, 4, 1073741824 }; + + public static void main( String args[] ) + { + test( t1 ); + test( t2 ); + test( t3 ); + } +} + +/* _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +*/ diff --git a/test/keller1.rl b/test/keller1.rl new file mode 100644 index 0000000..94d25b7 --- /dev/null +++ b/test/keller1.rl @@ -0,0 +1,1076 @@ +/* + * @LANG: c++ + */ + +/* + * Automatically generated by keller. Do not edit. + * + * Parts of this file are copied from Keller source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Keller source without restriction. The remainder is derived from + * "tmp.gmr" and inherits the copyright status of that file. + */ + +#line 1 "tmp.gmr" +#include <iostream> +using std::cout; +using std::endl; + + +#line 16 "tmp.rl" +enum token_type_e { + tt_id, + tt_equals, + tt_semi, + tt_pipe, + tt_amp, + tt_minus, + tt_dot, + tt_colon, + tt_percent, + tt_dollar, + tt_plus, + tt_number, + tt_star, + tt_question, + tt_not, + tt_andFSM, + tt_orFSM, + tt_open, + tt_close +}; + +struct LangEl +{ + int line, lineEnd; + int pos; + + int type; + int state; + LangEl *prev, *next; +}; + +struct Token : public LangEl +{ + const char *value; +}; + +struct Lel_start : public LangEl +{ +#line 32 "tmp.gmr" + + int si; +#line 59 "tmp.rl" +}; + +struct Lel_M : public LangEl +{ +#line 36 "tmp.gmr" + + int mi; +#line 67 "tmp.rl" +}; + +#define l__error 19 +#define l_tt_id 0 +#define l_tt_equals 1 +#define l_tt_semi 2 +#define l_tt_pipe 3 +#define l_tt_amp 4 +#define l_tt_minus 5 +#define l_tt_dot 6 +#define l_tt_colon 7 +#define l_tt_percent 8 +#define l_tt_dollar 9 +#define l_tt_plus 10 +#define l_tt_number 11 +#define l_tt_star 12 +#define l_tt_question 13 +#define l_tt_not 14 +#define l_tt_andFSM 15 +#define l_tt_orFSM 16 +#define l_tt_open 17 +#define l_tt_close 18 +#define l_start 23 +#define l_M 24 +#define l_A 25 +#define l_E 26 +#define l_T 27 +#define l_N 28 +#define l_K 29 +#define l_F 30 +#define l__start 31 +#define l__eof 20 + +struct LangEl; + +struct Parser +{ + Parser(); + + void parseLangEl( LangEl *langEl ); + int done( ); + + void push( LangEl *lel ) { + lel->prev = stack; + stack = lel; + } + LangEl *pop() { + LangEl *ret = stack; + stack = stack->prev; + return ret; + } + int pop( int n ); + void rem( LangEl *lel, int n ); + LangEl *stack; + int next; + LangEl *redLel; + LangEl *rhs[10]; + + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( LangEl *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + + +%%{ + machine Parser; + + getkey fpc->type; + + action shift { + fpc->state = fcurs; + push( fpc ); + } + + action pop1 { fnext *pop(1); } + action pop2 { fnext *pop(2); } + action pop3 { fnext *pop(3); } + action pop4 { fnext *pop(4); } + + action new_error { + redLel = new LangEl(); + redLel->type = 19; + } + + action newstart { + redLel = new Lel_start(); + redLel->type = 23; + } + + action newM { + redLel = new Lel_M(); + redLel->type = 24; + } + + action newA { + redLel = new LangEl(); + redLel->type = 25; + } + + action newE { + redLel = new LangEl(); + redLel->type = 26; + } + + action newT { + redLel = new LangEl(); + redLel->type = 27; + } + + action newN { + redLel = new LangEl(); + redLel->type = 28; + } + + action newK { + redLel = new LangEl(); + redLel->type = 29; + } + + action newF { + redLel = new LangEl(); + redLel->type = 30; + } + + action new_eof { + redLel = new LangEl(); + redLel->type = 20; + } + + action new_epsilon { + redLel = new LangEl(); + redLel->type = 21; + } + + action new_null { + redLel = new LangEl(); + redLel->type = 22; + } + + action rem1 { rem(fpc, 1); } + action rem2 { rem(fpc, 2); } + action rem3 { rem(fpc, 3); } + action rem4 { rem(fpc, 4); } + + action r_start_0 + { +#line 41 "tmp.gmr" + + cout << "start = M;" << endl; + static_cast<Lel_start*>(redLel)->si = static_cast<Lel_M*>(rhs[0])->mi; + +#line 214 "tmp.rl" + } + + action r_M_0 + { +#line 44 "tmp.gmr" + cout << "M = M A;" << endl; +#line 221 "tmp.rl" + } + + action r_M_1 + { +#line 45 "tmp.gmr" + cout << "M = A;" << endl; +#line 228 "tmp.rl" + } + + action r_A_0 + { +#line 46 "tmp.gmr" + cout << "A = tt_id tt_equals E tt_semi;" << endl; +#line 235 "tmp.rl" + } + + action r_E_0 + { +#line 47 "tmp.gmr" + cout << "E = E tt_pipe T;" << endl; +#line 242 "tmp.rl" + } + + action r_E_1 + { +#line 48 "tmp.gmr" + cout << "E = E tt_amp T;" << endl; +#line 249 "tmp.rl" + } + + action r_E_2 + { +#line 49 "tmp.gmr" + cout << "E = E tt_minus T;" << endl; +#line 256 "tmp.rl" + } + + action r_E_3 + { +#line 50 "tmp.gmr" + cout << "E = T;" << endl; +#line 263 "tmp.rl" + } + + action r_T_0 + { +#line 51 "tmp.gmr" + cout << "T = T tt_dot N;" << endl; +#line 270 "tmp.rl" + } + + action r_T_1 + { +#line 52 "tmp.gmr" + cout << "T = T N;" << endl; +#line 277 "tmp.rl" + } + + action r_T_2 + { +#line 53 "tmp.gmr" + cout << "T = N;" << endl; +#line 284 "tmp.rl" + } + + action r_N_0 + { +#line 54 "tmp.gmr" + cout << "N = N tt_colon tt_id;" << endl; +#line 291 "tmp.rl" + } + + action r_N_1 + { +#line 55 "tmp.gmr" + cout << "N = N tt_percent tt_id;" << endl; +#line 298 "tmp.rl" + } + + action r_N_2 + { +#line 56 "tmp.gmr" + cout << "N = N tt_dollar tt_id;" << endl; +#line 305 "tmp.rl" + } + + action r_N_3 + { +#line 57 "tmp.gmr" + cout << "N = N tt_colon tt_plus tt_number;" << endl; +#line 312 "tmp.rl" + } + + action r_N_4 + { +#line 58 "tmp.gmr" + cout << "N = N tt_colon tt_minus tt_number;" << endl; +#line 319 "tmp.rl" + } + + action r_N_5 + { +#line 59 "tmp.gmr" + cout << "N = N tt_percent tt_plus tt_number;" << endl; +#line 326 "tmp.rl" + } + + action r_N_6 + { +#line 60 "tmp.gmr" + cout << "N = N tt_percent tt_minus tt_number;" << endl; +#line 333 "tmp.rl" + } + + action r_N_7 + { +#line 61 "tmp.gmr" + cout << "N = N tt_dollar tt_plus tt_number;" << endl; +#line 340 "tmp.rl" + } + + action r_N_8 + { +#line 62 "tmp.gmr" + cout << "N = N tt_dollar tt_minus tt_number;" << endl; +#line 347 "tmp.rl" + } + + action r_N_9 + { +#line 63 "tmp.gmr" + cout << "N = K;" << endl; +#line 354 "tmp.rl" + } + + action r_K_0 + { +#line 64 "tmp.gmr" + cout << "K = F tt_star;" << endl; +#line 361 "tmp.rl" + } + + action r_K_1 + { +#line 65 "tmp.gmr" + cout << "K = F tt_question;" << endl; +#line 368 "tmp.rl" + } + + action r_K_2 + { +#line 66 "tmp.gmr" + cout << "K = F tt_plus;" << endl; +#line 375 "tmp.rl" + } + + action r_K_3 + { +#line 67 "tmp.gmr" + cout << "K = F;" << endl; +#line 382 "tmp.rl" + } + + action r_K_4 + { +#line 68 "tmp.gmr" + cout << "K = tt_not F tt_star;" << endl; +#line 389 "tmp.rl" + } + + action r_K_5 + { +#line 69 "tmp.gmr" + cout << "K = tt_not F tt_question;" << endl; +#line 396 "tmp.rl" + } + + action r_K_6 + { +#line 70 "tmp.gmr" + cout << "K = tt_not F tt_plus;" << endl; +#line 403 "tmp.rl" + } + + action r_K_7 + { +#line 71 "tmp.gmr" + cout << "K = tt_not F;" << endl; +#line 410 "tmp.rl" + } + + action r_F_0 + { +#line 72 "tmp.gmr" + cout << "F = tt_andFSM;" << endl; +#line 417 "tmp.rl" + } + + action r_F_1 + { +#line 73 "tmp.gmr" + cout << "F = tt_orFSM;" << endl; +#line 424 "tmp.rl" + } + + action r_F_2 + { +#line 74 "tmp.gmr" + cout << "F = tt_id;" << endl; +#line 431 "tmp.rl" + } + + action r_F_3 + { +#line 75 "tmp.gmr" + cout << "F = tt_open E tt_close;" << endl; +#line 438 "tmp.rl" + } + + main := + s0: start: ( + 23 @shift -> s1 | + 25 @shift -> s3 | + 24 @shift -> s4 | + 0 @shift -> s5 + ), + s1: ( + 20 @shift -> s54 + ), + s2: ( + (0|20) @pop2 @newM @r_M_0 @rem2 -> s54 + ), + s3: ( + (0|20) @pop1 @newM @r_M_1 @rem1 -> s54 + ), + s4: ( + 20 @pop1 @newstart @r_start_0 @rem1 -> s54 | + 25 @shift -> s2 | + 0 @shift -> s5 + ), + s5: ( + 1 @shift -> s6 + ), + s6: ( + 26 @shift -> s8 | + 27 @shift -> s9 | + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s7: ( + (0|20) @pop4 @newA @r_A_0 @rem4 -> s54 + ), + s8: ( + 2 @shift -> s7 | + 3 @shift -> s37 | + 4 @shift -> s38 | + 5 @shift -> s39 + ), + s9: ( + (2..5|18) @pop1 @newE @r_E_3 @rem1 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s10: ( + (0|2..9|14..18) @pop3 @newN @r_N_0 @rem3 -> s54 + ), + s11: ( + (0|2..9|14..18) @pop3 @newN @r_N_1 @rem3 -> s54 + ), + s12: ( + (0|2..9|14..18) @pop3 @newN @r_N_2 @rem3 -> s54 + ), + s13: ( + 11 @shift -> s14 + ), + s14: ( + (0|2..9|14..18) @pop4 @newN @r_N_3 @rem4 -> s54 + ), + s15: ( + 11 @shift -> s16 + ), + s16: ( + (0|2..9|14..18) @pop4 @newN @r_N_4 @rem4 -> s54 + ), + s17: ( + 11 @shift -> s18 + ), + s18: ( + (0|2..9|14..18) @pop4 @newN @r_N_5 @rem4 -> s54 + ), + s19: ( + 11 @shift -> s20 + ), + s20: ( + (0|2..9|14..18) @pop4 @newN @r_N_6 @rem4 -> s54 + ), + s21: ( + 11 @shift -> s22 + ), + s22: ( + (0|2..9|14..18) @pop4 @newN @r_N_7 @rem4 -> s54 + ), + s23: ( + 11 @shift -> s24 + ), + s24: ( + (0|2..9|14..18) @pop4 @newN @r_N_8 @rem4 -> s54 + ), + s25: ( + (0|2..9|14..18) @pop1 @newN @r_N_9 @rem1 -> s54 + ), + s26: ( + (0|2..6|14..18) @pop1 @newT @r_T_2 @rem1 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s27: ( + 0 @shift -> s10 | + 10 @shift -> s13 | + 5 @shift -> s15 + ), + s28: ( + 0 @shift -> s11 | + 10 @shift -> s17 | + 5 @shift -> s19 + ), + s29: ( + 0 @shift -> s12 | + 10 @shift -> s21 | + 5 @shift -> s23 + ), + s30: ( + (0|2..9|14..18) @pop2 @newK @r_K_0 @rem2 -> s54 + ), + s31: ( + (0|2..9|14..18) @pop2 @newK @r_K_1 @rem2 -> s54 + ), + s32: ( + (0|2..9|14..18) @pop2 @newK @r_K_2 @rem2 -> s54 + ), + s33: ( + (0|2..9|14..18) @pop1 @newK @r_K_3 @rem1 -> s54 | + 12 @shift -> s30 | + 13 @shift -> s31 | + 10 @shift -> s32 + ), + s34: ( + (0|2..6|14..18) @pop2 @newT @r_T_1 @rem2 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s35: ( + 27 @shift -> s9 | + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 26 @shift -> s40 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s36: ( + (0|2..10|12..18) @pop3 @newF @r_F_3 @rem3 -> s54 + ), + s37: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 27 @shift -> s53 + ), + s38: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 27 @shift -> s52 + ), + s39: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 27 @shift -> s42 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s40: ( + 18 @shift -> s36 | + 3 @shift -> s37 | + 4 @shift -> s38 | + 5 @shift -> s39 + ), + s41: ( + 29 @shift -> s25 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 28 @shift -> s51 + ), + s42: ( + (2..5|18) @pop3 @newE @r_E_2 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s43: ( + (0|2..9|14..18) @pop3 @newK @r_K_4 @rem3 -> s54 + ), + s44: ( + (0|2..9|14..18) @pop3 @newK @r_K_5 @rem3 -> s54 + ), + s45: ( + (0|2..9|14..18) @pop3 @newK @r_K_6 @rem3 -> s54 + ), + s46: ( + 17 @shift -> s35 | + 30 @shift -> s47 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s47: ( + (0|2..9|14..18) @pop2 @newK @r_K_7 @rem2 -> s54 | + 12 @shift -> s43 | + 13 @shift -> s44 | + 10 @shift -> s45 + ), + s48: ( + (0|2..10|12..18) @pop1 @newF @r_F_0 @rem1 -> s54 + ), + s49: ( + (0|2..10|12..18) @pop1 @newF @r_F_1 @rem1 -> s54 + ), + s50: ( + (0|2..10|12..18) @pop1 @newF @r_F_2 @rem1 -> s54 + ), + s51: ( + (0|2..6|14..18) @pop3 @newT @r_T_0 @rem3 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s52: ( + (2..5|18) @pop3 @newE @r_E_1 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s53: ( + (2..5|18) @pop3 @newE @r_E_0 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s54: ( + '' -> final + ) + ; +}%% + +%% write data; + +Parser::Parser( ) +{ } + +int Parser::init( ) +{ + %% write init; + return 0; +} + +int Parser::execute( LangEl *_data, int _len ) +{ + LangEl *p = _data; + LangEl *pe = _data+_len; + %% write exec; + if ( cs == Parser_error ) + return -1; + if ( cs >= Parser_first_final ) + return 1; + return 0; +} + +int Parser::finish( ) +{ + %% write eof; + if ( cs == Parser_error ) + return -1; + if ( cs >= Parser_first_final ) + return 1; + return 0; +} + +void Parser::parseLangEl( LangEl *lel ) +{ + redLel = 0; + execute( lel, 1 ); + while ( redLel != 0 ) { + execute( redLel, 1 ); + redLel = 0; + execute( lel, 1 ); + } +} + +int Parser::pop( int n ) +{ + for ( int i = n-1; i >= 0; i-- ) + rhs[i] = pop(); + return rhs[0]->state; +} + +void Parser::rem( LangEl *lel, int n ) +{ + for ( int i = n-1; i >= 0; i-- ) + delete rhs[i]; +} + +int Parser::done( ) +{ + Token *eof = new Token; + eof->type = l__eof; + eof->line = 0; + eof->pos = 0; + parseLangEl( eof ); + return finish(); +} + +#line 77 "tmp.gmr" + + +#include <assert.h> +#define MAX_TOKS 10000 + +struct TokList +{ + TokList() : numToks(0) { } + + void append( int type ); + int parse(); + + Token *toks[MAX_TOKS]; + int numToks; +}; + +void TokList::append( int type ) +{ + assert( numToks < MAX_TOKS ); + toks[numToks] = new Token; + toks[numToks]->type = type; + numToks += 1; +} + +int TokList::parse() +{ + Parser parser; + parser.init(); + for ( int i = 0; i < numToks; i++ ) + parser.parseLangEl( toks[i] ); + return parser.done(); +} + +void test0() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_star ); + tokList.append( tt_minus ); + tokList.append( tt_andFSM ); + tokList.append( tt_dot ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_andFSM ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} + +void test1() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_open ); + tokList.append( tt_orFSM ); + tokList.append( tt_minus ); + tokList.append( tt_andFSM ); + tokList.append( tt_close ); + tokList.append( tt_star ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test2() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_not ); + tokList.append( tt_open ); + tokList.append( tt_orFSM ); + tokList.append( tt_minus ); + tokList.append( tt_not ); + tokList.append( tt_andFSM ); + tokList.append( tt_close ); + tokList.append( tt_star ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test3() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_colon ); + tokList.append( tt_minus ); + tokList.append( tt_number ); + tokList.append( tt_id ); + tokList.append( tt_colon ); + tokList.append( tt_id ); + tokList.append( tt_id ); + tokList.append( tt_dollar ); + tokList.append( tt_plus ); + tokList.append( tt_number ); + tokList.append( tt_id ); + tokList.append( tt_percent ); + tokList.append( tt_minus ); + tokList.append( tt_number ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test4() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_pipe ); + tokList.append( tt_id ); + tokList.append( tt_amp ); + tokList.append( tt_id ); + tokList.append( tt_minus ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} + +int main() +{ + test0(); + test1(); + test2(); + test3(); + test4(); +} + +#ifdef _____OUTPUT_____ +F = tt_id; +K = F tt_star; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = F; +N = K; +T = N; +F = tt_id; +K = F; +N = K; +T = T tt_dot N; +E = E tt_minus T; +A = tt_id tt_equals E tt_semi; +M = A; +F = tt_id; +K = F; +N = K; +T = N; +F = tt_andFSM; +K = F; +N = K; +T = T N; +F = tt_id; +K = F; +N = K; +T = T N; +E = T; +A = tt_id tt_equals E tt_semi; +M = M A; +start = M; +1 +F = tt_orFSM; +K = F; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = F; +N = K; +T = N; +E = E tt_minus T; +F = tt_open E tt_close; +K = F tt_star; +N = K; +T = N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_orFSM; +K = F; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = tt_not F; +N = K; +T = N; +E = E tt_minus T; +F = tt_open E tt_close; +K = tt_not F tt_star; +N = K; +T = N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_id; +K = F; +N = K; +N = N tt_colon tt_minus tt_number; +T = N; +F = tt_id; +K = F; +N = K; +N = N tt_colon tt_id; +T = T N; +F = tt_id; +K = F; +N = K; +N = N tt_dollar tt_plus tt_number; +T = T N; +F = tt_id; +K = F; +N = K; +N = N tt_percent tt_minus tt_number; +T = T N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_id; +K = F; +N = K; +T = N; +E = T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_pipe T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_amp T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_minus T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +#endif diff --git a/test/langtrans_c.sh b/test/langtrans_c.sh new file mode 100755 index 0000000..7d9cf41 --- /dev/null +++ b/test/langtrans_c.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 + +# Get the amchine name. +machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file` + +# Make a temporary version of the test case the C language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_c.txl > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: c + * @GENERATED: yes + */ +#include <string.h> +#include <stdio.h> +EOF + +# Write the data declarations +sed -n '/^%%$/q;p' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/p' $file.pr + +# Write out the init and execute routines. +cat << EOF +int cs; +%% write data; +void init() +{ +EOF + +sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t/;p}' $file.pr + +cat << EOF + %% write init; +} + +void exec( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + %% write exec; +} + +void finish( ) +{ + %% write eof; + if ( cs >= ${machine}_first_final ) + printf( "ACCEPT\\n" ); + else + printf( "FAIL\\n" ); +} +EOF + +# Write out the test data. +sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print "char *inp[] = {" +} +{ + print " " $0 "," +} +END { + print "};" + print "" + print "int inplen = " NR ";" +}' + +# Write out the main routine. +cat << EOF + +int main( ) +{ + int i; + for ( i = 0; i < inplen; i++ ) { + init(); + exec( inp[i], strlen(inp[i]) ); + finish(); + } + return 0; +} +#ifdef _____OUTPUT_____ +EOF + +# Write out the expected output. +sed -n '1,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file +echo "#endif" + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_c.txl b/test/langtrans_c.txl new file mode 100644 index 0000000..831350c --- /dev/null +++ b/test/langtrans_c.txl @@ -0,0 +1,277 @@ +include "testcase.txl" + +define c_statements + [repeat c_lang_stmt] +end define + +define c_lang_stmt + [al_ragel_stmt] + | [c_variable_decl] + | [c_expr_stmt] + | [c_if_stmt] + | [EX] '{ [IN] [NL] [c_statements] [EX] '} [IN] [NL] +end define + +define c_variable_decl + [c_type_decl] [id] [opt union] '; [NL] +end define + +define c_type_decl + [al_type_decl] + | 'char '* +end define + +define c_expr_stmt + [c_expr] '; [NL] +end define + +define c_expr + [c_term] [repeat c_expr_extend] +end define + +define c_expr_extend + [al_expr_op] [c_term] +end define + +define c_term + [al_term] + | [id] '( [c_args] ') +end define + +define c_args + [list c_expr] +end define + +define c_sign + '- | '+ +end define + +define c_if_stmt + 'if '( [c_expr] ') [NL] [IN] + [c_lang_stmt] [EX] + [opt c_else] +end define + +define c_else + 'else [NL] [IN] + [c_lang_stmt] [EX] +end define + +define c_lang + [c_statements] + '%% [NL] + [c_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [c_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [c_statements] [EX] '} [NL] +end define + +rule boolTypes + replace [al_type_decl] + 'bool + by + 'int +end rule + +rule ptrTypes + replace [c_type_decl] + 'ptr + by + 'char '* +end rule + +rule boolVals1 + replace [al_term] + 'true + by + '1 +end rule + +rule boolVals2 + replace [al_term] + 'false + by + '0 +end rule + +function alStmtToC1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union]'; + construct CType [c_type_decl] + Type + construct Result [c_variable_decl] + CType [boolTypes] [ptrTypes] Id OptUnion '; + replace [repeat c_lang_stmt] + by + Result +end function + +function alExprExtendToC AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct RestC [repeat c_expr_extend] + _ [alExprExtendToC Rest] + replace [repeat c_expr_extend] + by + Op Term RestC +end function + +function alExprToC AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct CExprExtend [repeat c_expr_extend] + _ [alExprExtendToC AlExprExtend] + construct Result [opt c_expr] + ALTerm CExprExtend + replace [opt c_expr] + by + Result [boolVals1] [boolVals2] +end function + +function alStmtToC2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptCExpr [opt c_expr] + _ [alExprToC AlExpr] + deconstruct OptCExpr + CExpr [c_expr] + replace [repeat c_lang_stmt] + by + CExpr '; +end function + +function alOptElseC AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + deconstruct CSubStmts + CSubStmt [c_lang_stmt] + replace [opt c_else] + by + 'else + CSubStmt +end function + +function alStmtToC3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptCExpr [opt c_expr] + _ [alExprToC AlExpr] + deconstruct OptCExpr + CExpr [c_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + deconstruct CSubStmts + CSubStmt [c_lang_stmt] + construct OptCElse [opt c_else] + _ [alOptElseC AlOptElse] + replace [repeat c_lang_stmt] + by + 'if '( CExpr ') + CSubStmt + OptCElse +end function + +function alStmtToC4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat c_lang_stmt] + by + 'printf '( '"%i" ', Id '); +end function + +function alStmtToC4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat c_lang_stmt] + by + 'fputs '( String , 'stdout '); +end function + +function alStmtToC5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + replace [repeat c_lang_stmt] + by + '{ CSubStmts '} +end function + +function alStmtToC6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat c_lang_stmt] + by + RagelStmt +end function + +function alToC AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct FirstC [repeat c_lang_stmt] + _ + [alStmtToC1 FirstStmt] + [alStmtToC2 FirstStmt] + [alStmtToC3 FirstStmt] + [alStmtToC4a FirstStmt] + [alStmtToC4b FirstStmt] + [alStmtToC5 FirstStmt] + [alStmtToC6 FirstStmt] + construct RestC [repeat c_lang_stmt] + _ [alToC Rest] + replace [repeat c_lang_stmt] + by + FirstC [. RestC] +end function + +rule actionTransC + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct CStmts [repeat c_lang_stmt] + _ [alToC AlStmts] + by + '{ CStmts '} +end rule + +function langTransC + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct CDefinitions [repeat c_lang_stmt] + _ [alToC Definitions] + construct CInitializations [repeat c_lang_stmt] + _ [alToC Initializations] + by + CDefinitions + '%% + CInitializations + RagelDef [actionTransC] +end function + +function main + replace [program] + P [program] + by + P [langTransC] +end function diff --git a/test/langtrans_d.sh b/test/langtrans_d.sh new file mode 100755 index 0000000..117e50a --- /dev/null +++ b/test/langtrans_d.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 + +# Get the amchine name. +machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file` + +# Make a temporary version of the test case the D language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_d.txl > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: d + * @GENERATED: yes + */ +import std.stdio; +import std.string; + +class $machine +{ +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + int cs; + %% write data; + void init() + { +EOF + +sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + %% write init; + } + + void exec( char *data, int len ) + { + char *p = data; + char *pe = data + len; + %% write exec; + } + + void finish( ) + { + %% write eof; + if ( cs >= ${machine}_first_final ) + writefln( "ACCEPT" ); + else + writefln( "FAIL" ); + } + +EOF + +# Write out the test data. +sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " char[][] inp = [" +} +{ + print " " $0 "," +} +END { + print " ];" + print "" + print " int inplen = " NR ";" +}' + +# Write out the main routine. +cat << EOF +} + +int main( ) +{ + $machine m = new $machine(); + int i; + for ( i = 0; i < m.inplen; i++ ) { + m.init(); + m.exec( m.inp[i], m.inp[i].length ); + m.finish(); + } + return 0; +} +/* _____OUTPUT_____ +EOF + +# Write out the expected output. +sed -n '1,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file +echo "*/" + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_d.txl b/test/langtrans_d.txl new file mode 100644 index 0000000..a9151f9 --- /dev/null +++ b/test/langtrans_d.txl @@ -0,0 +1,256 @@ +include "testcase.txl" + +define d_statements + [repeat d_lang_stmt] +end define + +define d_lang_stmt + [al_ragel_stmt] + | [d_variable_decl] + | [d_expr_stmt] + | [d_if_stmt] + | [EX] '{ [IN] [NL] [d_statements] [EX] '} [IN] [NL] +end define + +define d_variable_decl + [d_type_decl] [id] [opt union] '; [NL] +end define + +define d_type_decl + [al_type_decl] + | 'char '* +end define + +define d_expr_stmt + [d_expr] '; [NL] +end define + +define d_expr + [d_term] [repeat d_expr_extend] +end define + +define d_expr_extend + [al_expr_op] [d_term] +end define + +define d_term + [al_term] + | [id] '( [d_args] ') +end define + +define d_args + [list d_expr] +end define + +define d_sign + '- | '+ +end define + +define d_if_stmt + 'if '( [d_expr] ') [NL] [IN] + [d_lang_stmt] [EX] + [opt d_else] +end define + +define d_else + 'else [NL] [IN] + [d_lang_stmt] [EX] +end define + +define d_lang + [d_statements] + '%% [NL] + [d_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [d_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [d_statements] [EX] '} [NL] +end define + +rule ptrTypes + replace [d_type_decl] + 'ptr + by + 'char '* +end rule + +function alStmtToD1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + construct DType [d_type_decl] + Type + construct Result [d_variable_decl] + DType [ptrTypes] Id OptUnion '; + replace [repeat d_lang_stmt] + by + Result +end function + +function alExprExtendToD AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct DRest [repeat d_expr_extend] + _ [alExprExtendToD Rest] + replace [repeat d_expr_extend] + by + Op Term DRest +end function + +function alExprToD AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct DExprExtend [repeat d_expr_extend] + _ [alExprExtendToD AlExprExtend] + construct Result [opt d_expr] + ALTerm DExprExtend + replace [opt d_expr] + by + Result +end function + +function alStmtToD2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptDExpr [opt d_expr] + _ [alExprToD AlExpr] + deconstruct OptDExpr + DExpr [d_expr] + replace [repeat d_lang_stmt] + by + DExpr '; +end function + +function alOptElseD AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + deconstruct DSubStmts + DSubStmt [d_lang_stmt] + replace [opt d_else] + by + 'else + DSubStmt +end function + +function alStmtToD3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptDExpr [opt d_expr] + _ [alExprToD AlExpr] + deconstruct OptDExpr + DExpr [d_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + deconstruct DSubStmts + DSubStmt [d_lang_stmt] + construct OptDElse [opt d_else] + _ [alOptElseD AlOptElse] + replace [repeat d_lang_stmt] + by + 'if '( DExpr ') + DSubStmt + OptDElse +end function + +function alStmtToD4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat d_lang_stmt] + by + 'writef '( '"%d" ', Id '); +end function + +function alStmtToD4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat d_lang_stmt] + by + 'writef '( '"%s" ', String '); +end function + +function alStmtToD5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + replace [repeat d_lang_stmt] + by + '{ DSubStmts '} +end function + +function alStmtToD6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat d_lang_stmt] + by + RagelStmt +end function + +function alToD AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct DFirst [repeat d_lang_stmt] + _ + [alStmtToD1 FirstStmt] + [alStmtToD2 FirstStmt] + [alStmtToD3 FirstStmt] + [alStmtToD4a FirstStmt] + [alStmtToD4b FirstStmt] + [alStmtToD5 FirstStmt] + [alStmtToD6 FirstStmt] + construct DRest [repeat d_lang_stmt] + _ [alToD Rest] + replace [repeat d_lang_stmt] + by + DFirst [. DRest] +end function + +rule actionTransD + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct DStmts [repeat d_lang_stmt] + _ [alToD AlStmts] + by + '{ DStmts '} +end rule + +function langTransD + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct DDefinitions [repeat d_lang_stmt] + _ [alToD Definitions] + construct DInitializations [repeat d_lang_stmt] + _ [alToD Initializations] + by + DDefinitions + '%% + DInitializations + RagelDef [actionTransD] +end function + +function main + replace [program] + P [program] + by + P [langTransD] +end function diff --git a/test/langtrans_java.sh b/test/langtrans_java.sh new file mode 100755 index 0000000..65b6184 --- /dev/null +++ b/test/langtrans_java.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 +root=${file%.rl} +class=${root}_java + +# Make a temporary version of the test case the Java language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_java.txl - $class > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: java + * @ALLOW_GENFLAGS: -T0 + * @GENERATED: yes + */ + +class $class +{ +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + + int cs; + %% write data; + + void init() + { +EOF + +sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + %% write init; + } + + void exec( char data[], int len ) + { + int p = 0; + int pe = len; + %% write exec; + } + + void finish( ) + { + %% write eof; + if ( cs >= ${class}_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + +EOF + +# Write out the test data. +sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " static final String inp[] = {" +} +{ + print " " $0 "," +} +END { + print " };" + print "" + print " static final int inplen = " NR ";" +}' + + +# Write out the main routine. +cat << EOF + + public static void main (String[] args) + { + $class machine = new $class(); + for ( int i = 0; i < inplen; i++ ) { + machine.init(); + machine.exec( inp[i].toCharArray(), inp[i].length() ); + machine.finish(); + } + } +} + +EOF + +# Write out the expected output. +sed -n '/\/\* _____OUTPUT_____/,/_____OUTPUT_____ \*\//p;' $file + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_java.txl b/test/langtrans_java.txl new file mode 100644 index 0000000..3f1755d --- /dev/null +++ b/test/langtrans_java.txl @@ -0,0 +1,303 @@ +include "testcase.txl" + +keys + 'boolean 'new +end keys + + +define java_statements + [repeat java_lang_stmt] +end define + +define java_lang_stmt + [al_ragel_stmt] + | [java_variable_decl] + | [java_expr_stmt] + | [java_if_stmt] + | [EX] '{ [IN] [NL] [java_statements] [EX] '} [IN] [NL] +end define + +define java_variable_decl + [java_type_decl] [id] [opt union] '; [NL] +end define + +define java_type_decl + [al_type_decl] + | 'boolean +end define + +define java_expr_stmt + [java_expr] '; [NL] +end define + +define java_expr + [java_term] [repeat java_expr_extend] +end define + +define java_expr_extend + [al_expr_op] [java_term] +end define + +define java_term + [al_term] + | [id] [repeat java_dot_id] + | [id] [repeat java_dot_id] '( [java_args] ') + | 'new [java_type_decl] [union] +end define + +define java_dot_id + '. [id] +end define + +define java_args + [list java_expr] +end define + +define java_sign + '- | '+ +end define + +define java_if_stmt + 'if '( [java_expr] ') [NL] [IN] + [java_lang_stmt] [EX] + [opt java_else] +end define + +define java_else + 'else [NL] [IN] + [java_lang_stmt] [EX] +end define + +define java_lang + [java_statements] + '%% [NL] + [java_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [java_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [java_statements] [EX] '} [NL] +end define + +function clearUnion Type [java_type_decl] Id [id] + replace [opt union] + Union [union] + import ArrayInits [java_statements] + Stmts [repeat java_lang_stmt] + export ArrayInits + Id '= 'new Type Union '; Stmts + by + '[] +end function + +rule boolTypes + replace [java_type_decl] + 'bool + by + 'boolean +end rule + +rule ptrTypes + replace [al_type_decl] + 'ptr + by + 'int +end rule + +function alStmtToJava1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + construct JavaType [java_type_decl] + Type + construct Result [java_variable_decl] + JavaType [boolTypes] [ptrTypes] Id OptUnion [clearUnion JavaType Id] '; + replace [repeat java_lang_stmt] + by + Result +end function + +function alExprExtendToJava AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct JavaRest [repeat java_expr_extend] + _ [alExprExtendToJava Rest] + replace [repeat java_expr_extend] + by + Op Term JavaRest +end function + +function alExprToJava AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct JavaExprExtend [repeat java_expr_extend] + _ [alExprExtendToJava AlExprExtend] + construct Result [opt java_expr] + ALTerm JavaExprExtend + replace [opt java_expr] + by + Result +end function + +function alStmtToJava2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptJavaExpr [opt java_expr] + _ [alExprToJava AlExpr] + deconstruct OptJavaExpr + JavaExpr [java_expr] + replace [repeat java_lang_stmt] + by + JavaExpr '; +end function + +function alOptElseJava AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + deconstruct JavaSubStmts + JavaSubStmt [java_lang_stmt] + replace [opt java_else] + by + 'else + JavaSubStmt +end function + +function alStmtToJava3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptJavaExpr [opt java_expr] + _ [alExprToJava AlExpr] + deconstruct OptJavaExpr + JavaExpr [java_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + deconstruct JavaSubStmts + JavaSubStmt [java_lang_stmt] + construct OptJavaElse [opt java_else] + _ [alOptElseJava AlOptElse] + replace [repeat java_lang_stmt] + by + 'if '( JavaExpr ') + JavaSubStmt + OptJavaElse +end function + +function alStmtToJava4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat java_lang_stmt] + by + 'System '. 'out '. 'print '( Id '); +end function + +function alStmtToJava4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat java_lang_stmt] + by + 'System '. 'out '. 'print '( String '); +end function + +function alStmtToJava5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + replace [repeat java_lang_stmt] + by + '{ JavaSubStmts '} +end function + +function alStmtToJava6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat java_lang_stmt] + by + RagelStmt +end function + + +function alToJava AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct JavaFirst [repeat java_lang_stmt] + _ + [alStmtToJava1 FirstStmt] + [alStmtToJava2 FirstStmt] + [alStmtToJava3 FirstStmt] + [alStmtToJava4a FirstStmt] + [alStmtToJava4b FirstStmt] + [alStmtToJava5 FirstStmt] + [alStmtToJava6 FirstStmt] + construct JavaRest [repeat java_lang_stmt] + _ [alToJava Rest] + replace [repeat java_lang_stmt] + by + JavaFirst [. JavaRest] +end function + +rule actionTransJava + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct JavaStmts [repeat java_lang_stmt] + _ [alToJava AlStmts] + by + '{ JavaStmts '} +end rule + +rule machineName + replace $ [machine_stmt] + 'machine _ [id] '; + import TXLargs [repeat stringlit] + Arg1 [stringlit] _ [repeat stringlit] + construct ClassName [id] + _ [unquote Arg1] + by + 'machine ClassName '; +end rule + +function langTransJava + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct JavaDefinitions [repeat java_lang_stmt] + _ [alToJava Definitions] + construct JavaInitializations [repeat java_lang_stmt] + _ [alToJava Initializations] + construct NewRagelDef [ragel_def] + RagelDef [actionTransJava] [machineName] + import ArrayInits [java_statements] + ArrayInitStmts [repeat java_lang_stmt] + by + JavaDefinitions + '%% + ArrayInitStmts [. JavaInitializations] + NewRagelDef +end function + +function main + replace [program] + P [program] + export ArrayInits [java_statements] + _ + by + P [langTransJava] +end function diff --git a/test/lmgoto.rl b/test/lmgoto.rl new file mode 100644 index 0000000..96c4392 --- /dev/null +++ b/test/lmgoto.rl @@ -0,0 +1,198 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +struct Scanner +{ + int cs, act; + char *tokstart, *tokend; + bool isCxx; + + void token( int tok ); + void run( char *buf ); +}; + + +%%{ + machine Scanner; + + # Process all comments, relies on isCxx being set. + comment := |* + '*/' { + if ( ! isCxx ) + fgoto main; + else { + cout << "comm char: " << tokstart[0] << endl; + cout << "comm char: " << tokstart[1] << endl; + } + }; + + '\n' { + if ( isCxx ) + fgoto main; + else + cout << "comm char: " << tokstart[0] << endl; + }; + + any { + cout << "comm char: " << tokstart[0] << endl; + }; + *|; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id ); }; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' {token( TK_NameSep );}; + '==' {token( TK_EqualsEquals );}; + '!=' {token( TK_NotEquals );}; + '&&' {token( TK_AndAnd );}; + '||' {token( TK_OrOr );}; + '*=' {token( TK_MultAssign );}; + '/=' {token( TK_DivAssign );}; + '%=' {token( TK_PercentAssign );}; + '+=' {token( TK_PlusAssign );}; + '-=' {token( TK_MinusAssign );}; + '&=' {token( TK_AmpAssign );}; + '^=' {token( TK_CaretAssign );}; + '|=' {token( TK_BarAssign );}; + '++' {token( TK_PlusPlus );}; + '--' {token( TK_MinusMinus );}; + '->' {token( TK_Arrow );}; + '->*' {token( TK_ArrowStar );}; + '.*' {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) { token( tokstart[0] );}; + + # Comments and whitespace. Handle these outside of the machine so that se + # don't end up buffering the comments. + '/*' { isCxx = false; fgoto comment; }; + '//' { isCxx = true; fgoto comment; }; + + ( any - 33..126 )+ { token( TK_Whitespace );}; + + *|; +}%% + +%% write data nofinal; + +void Scanner::token( int tok ) +{ + const char *data = tokstart; + int len = tokend - tokstart; + cout << "<" << tok << "> "; + if ( data != 0 ) { + for ( int i = 0; i < len; i++ ) + cout << data[i]; + } + cout << '\n'; +} + +void Scanner::run( char *buf ) +{ + int len = strlen( buf ); + %% write init; + char *p = buf; + char *pe = buf + len; + %% write exec; + + if ( cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } + %% write eof; +} + +int main() +{ + Scanner scanner; + scanner.run( + "//hello*/\n" + "/*hi there*/ hello 0x88\n" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +comm char: h +comm char: e +comm char: l +comm char: l +comm char: o +comm char: * +comm char: / +comm char: h +comm char: i +comm char: +comm char: t +comm char: h +comm char: e +comm char: r +comm char: e +<241> +<195> hello +<241> +<220> 0x88 +#endif diff --git a/test/mailbox1.h b/test/mailbox1.h new file mode 100644 index 0000000..bf9a87e --- /dev/null +++ b/test/mailbox1.h @@ -0,0 +1,33 @@ +#ifndef _MAILBOX1_H +#define _MAILBOX1_H + +#include <stdio.h> +#include <string.h> +#include "vector.h" + +struct MBox +{ + int cs; + + Vector<char> headName; + Vector<char> headContent; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +#endif diff --git a/test/mailbox1.rl b/test/mailbox1.rl new file mode 100644 index 0000000..89e8775 --- /dev/null +++ b/test/mailbox1.rl @@ -0,0 +1,252 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P + */ + +/* + * Parses unix mail boxes into headers and bodies. + */ + +#include "mailbox1.h" + +%%{ + machine MBox; + + # Buffer the header names. + action bufHeadName { fsm->headName.append(fc); } + + # Buffer the header content. + action bufHeadContent { fsm->headContent.append(fc); } + + # Terminate a header. If it is an interesting header then prints it. + action finBufHeadContent { + /* Terminate the buffers. */ + fsm->headName.append(0); + fsm->headContent.append(0); + + /* Print the header. Interesting headers. */ + printf("%s:%s\n", fsm->headName.data, fsm->headContent.data); + + /* Clear for the next time we use them. */ + fsm->headName.empty(); + fsm->headContent.empty(); + } + + action msgstart{ + printf("NEW MESSAGE\n"); + } + + # Prints a blank line after the end of the headers of each message. + action blankLine { + printf("\n"); + } + + # Helpers we will use in matching the date section of the from line. + day = /[A-Z][a-z][a-z]/; + month = /[A-Z][a-z][a-z]/; + year = /[0-9][0-9][0-9][0-9]/; + time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' ); + letterZone = /[A-Z][A-Z][A-Z]/; + numZone = /[+\-][0-9][0-9][0-9][0-9]/; + zone = letterZone | numZone; + dayNum = /[0-9 ][0-9]/; + + # These are the different formats of the date minus an obscure + # type that has a funny string 'remote from xxx' on the end. Taken + # from c-client in the imap-2000 distribution. + date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' . + ( year | year . ' ' . zone | zone . ' ' . year ); + + # Note the priority assignment on the end of the from line. While we + # matching the body of a message we may enter into this machine. We will + # not leave the body of the previous message until this entire from line is + # matched. + fromLine = 'From ' . /[^\n]/* . ' ' . date . '\n' @(new_msg,1) @msgstart; + + # The types of characters that can be used as a header name. + hchar = print - [ :]; + + header = + # The name of the header. + hchar+ $bufHeadName . ':' + # The content of the header. Look out for continuations. + . ( (extend - '\n') $bufHeadContent | '\n'. [ \t] @bufHeadContent )* + # Buffer must end with a newline that does not continue. + . '\n' %finBufHeadContent; + + messageLine = ( extend - '\n' )* . '\n' @(new_msg, 0); + + # When we get to the last newline we are still matching messageLine + # so on the last newline it will think we are still in the message. + # We need this because we can't assume that every newline means + # the end of the current message, whereas at the same time we requre + # that there be a newline before the fromLine of the next message. + message = ( fromLine . header* . '\n' @blankLine . messageLine* . '\n' ); + + # Its important that the priority in the fromLine gets bumped up + # so that we are able to move to new messages. Otherwise we + # will always stay in the message body of the first message. + main := message*; +}%% + +%% write data; + +void MBox::init( ) +{ + MBox *fsm = this; + %% write init; +} + +void MBox::execute( char *data, int len ) +{ + MBox *fsm = this; + char *p = data; + char *pe = data + len; + %%{ + access fsm->; + write exec; + }%% +} + +int MBox::finish( ) +{ + if ( cs == MBox_error ) + return -1; + if ( cs >= MBox_first_final ) + return 1; + return 0; +} + +MBox mbox; + +void test( char *buf ) +{ + int len = strlen( buf ); + mbox.init(); + mbox.execute( buf, len ); + if ( mbox.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "From email address goes here Wed Nov 28 13:30:05 2001 -0500\n" + "Header1: this is the header contents\n" + " there is more on the second line\n" + " and more on the third line.\n" + "Header2: slkdj\n" + "\n" + "This is the message data\n" + "\n" + "From email Wed Nov 28 13:30:05 2001 -0500\n" + "Header: \n" + "\n" + "mail message\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + "There are no headers. \n" + "\n" + "From email Wed Nov 28 13:30:05 EST 2000\n" + "\n" + "There are no headers.\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + ">From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + "\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +NEW MESSAGE +Header1: this is the header contents there is more on the second line and more on the third line. +Header2: slkdj + +NEW MESSAGE +Header: + +ACCEPT +NEW MESSAGE + +NEW MESSAGE + +ACCEPT +NEW MESSAGE +Header:alsdj + +ACCEPT +NEW MESSAGE +Header:alsdj + +ACCEPT +NEW MESSAGE +Header:alsdj + +NEW MESSAGE + +FAIL +NEW MESSAGE +Header:alsdj + +NEW MESSAGE + +ACCEPT +#endif diff --git a/test/mailbox2.rl b/test/mailbox2.rl new file mode 100644 index 0000000..d84696d --- /dev/null +++ b/test/mailbox2.rl @@ -0,0 +1,173 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + */ + +#include <iostream> +#include <string.h> + +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine mailbox; + + action prn_char { cout << *p; } + action prn_space { cout << ' '; } + action prn_word { cout.write(ws, p-ws); cout << ' '; } + action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); } + action prn_addr2 { cout << "| "; cout.write(ws, p-ws); } + action prn_tab { cout << '\t'; } + action prn_nl { cout << '\n'; } + action prn_separator { cout << "------\n"; } + action prn_from { cout << "FROM\n"; } + action prn_to { cout << "TO\n"; } + action prn_subj { cout << "SUBJECT\n"; } + + action start_word { ws = p; } + action start_headers { preserve = p; } + action end_headers {preserve = 0;} + + day = upper lower{2}; + month = upper lower{2}; + year = digit{4}; + time = digit{2} ':' digit{2} + ( ':' digit{2} )?; + letterZone = upper{3}; + numZone = [+\-] digit{4}; + zone = letterZone | numZone; + dayNum = ( digit | ' ' ) digit; + + date = day ' ' month ' ' + dayNum ' ' time ' ' + ( + year | + year ' ' zone | + zone ' ' year + ); + + fromLine = 'From ' [^\n]* ' ' + date '\n' @start_headers; + + headerChar = print - [ :]; + headersToPrint = 'From' | + 'To' | 'Subject'; + headersToConsume = + headerChar+ - headersToPrint; + + consumeHeader = + headersToConsume ':' + ( + [^\n] | + ( '\n' [ \t] ) + )* + '\n'; + + addrWS = ( [ \t]+ | '\n' [ \t]+ ); + addrComment = '(' [^)]* ')'; + addrWord = [^"'@,<>() \t\n]+; + addrAddr1 = '<' [^>]* '>'; + addrAddr2 = addrWord '@' addrWord; + addrString = + '"' [^"]* '"' | + "'" [^']* "'"; + + addrItem = ( + addrAddr1 %prn_addr1 | + addrAddr2 %prn_addr2 | + addrWord %prn_word | + addrString %prn_word + ) >start_word; + + address = ( + addrWS | + addrComment | + addrItem + )** >prn_tab; + + addrHeader = ( + 'From' %prn_from | + 'To' %prn_to + ) ':' + address ( ',' @prn_nl address )* + '\n' %prn_nl; + + subjectHeader = + 'Subject:' @prn_subj @prn_tab + ' '* <: + ( + [^\n] @prn_char | + ( '\n' [ \t]+ ) %prn_space + )** + '\n' %prn_nl; + + header = consumeHeader | + addrHeader | subjectHeader; + + messageLine = + ( [^\n]* '\n' - fromLine ); + + main := ( + fromLine %prn_separator + header* + '\n' @end_headers + messageLine* + )*; + }%% + +%% write data; + +#define BUFSIZE 8192 + +void test( char *buf ) +{ + int cs, len = strlen( buf ); + char *preserve = 0, *ws = 0; + + %% write init; + char *p = buf; + char *pe = p + len; + %% write exec; + + if ( cs == mailbox_error ) + cerr << "ERROR" << endl; + + if ( cs < mailbox_first_final ) + cerr << "DID NOT FINISH IN A FINAL STATE" << endl; +} + +int main() +{ + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston\" <thurston@cs.queensu.ca>\n" + "Subject: the squirrel has landed\n" + "\n" + "Message goes here. \n" + "From (trick from line).\n" + "From: not really a header\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) \n" + "\n" + "Message goes here. \n" + "\n" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +------ +FROM + "Adrian D. Thurston" | thurston@cs.queensu.ca +SUBJECT + the squirrel has landed +------ +TO + Edgar Allen Poe | ep@net.com +SUBJECT + (no subject) +#endif diff --git a/test/mailbox3.rl b/test/mailbox3.rl new file mode 100644 index 0000000..e8089bb --- /dev/null +++ b/test/mailbox3.rl @@ -0,0 +1,247 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + */ + +#include <iostream> +#include <string.h> + +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine mailbox; + + action prn_char { cout << *p; } + action prn_space { cout << ' '; } + action prn_word { cout.write(ws, p-ws); cout << ' '; } + action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); } + action prn_addr2 { cout << "| "; cout.write(ws, p-ws); } + action prn_tab { cout << '\t'; } + action prn_nl { cout << '\n'; } + action prn_separator { cout << "------\n"; } + action prn_from { cout << "FROM\n"; } + action prn_to { cout << "TO\n"; } + action prn_subj { cout << "SUBJECT\n"; } + + action start_word { ws = p; } + action start_headers { preserve = p; } + action end_headers {preserve = 0;} + + day = upper lower{2}; + month = upper lower{2}; + year = digit{4}; + time = digit{2} ':' digit{2} + ( ':' digit{2} )?; + letterZone = upper{3}; + numZone = [+\-] digit{4}; + zone = letterZone | numZone; + dayNum = ( digit | ' ' ) digit; + + date = day ' ' month ' ' + dayNum ' ' time ' ' + ( + year | + year ' ' zone | + zone ' ' year + ); + + fromLine = 'From ' [^\n]* ' ' + date '\n' @start_headers; + + headerChar = print - [ :]; + headersToPrint = 'From' | + 'To' | 'Subject'; + headersToConsume = + headerChar+ - headersToPrint; + + action init_hlen {hlen = 0;} + action hlen {hlen++ < 50} + + consumeHeaderBody = + ':' @init_hlen + ( + [^\n] | + ( '\n' [ \t] ) + )* when hlen + '\n'; + + consumeHeader = + headersToConsume consumeHeaderBody; + + addrWS = ( [ \t]+ | '\n' [ \t]+ ); + addrComment = '(' [^)]* ')'; + addrWord = [^"'@,<>() \t\n]+; + addrAddr1 = '<' [^>]* '>'; + addrAddr2 = addrWord '@' addrWord; + addrString = + '"' [^"]* '"' | + "'" [^']* "'"; + + addrItem = ( + addrAddr1 %prn_addr1 | + addrAddr2 %prn_addr2 | + addrWord %prn_word | + addrString %prn_word + ) >start_word; + + address = ( + addrWS | + addrComment | + addrItem + )** >prn_tab; + + addrHeader = ( + 'From' %prn_from | + 'To' %prn_to + ) ':' @init_hlen + ( address ( ',' @prn_nl address )* ) when hlen + '\n' %prn_nl; + + subjectHeader = + 'Subject:' @prn_subj @prn_tab @init_hlen + ( + ' '* <: + ( + [^\n] @prn_char | + ( '\n' [ \t]+ ) %prn_space + )** + ) when hlen + '\n' %prn_nl; + + header = consumeHeader | + addrHeader | subjectHeader; + + messageLine = + ( [^\n]* when hlen '\n' @init_hlen ) - fromLine; + + main := ( + fromLine %prn_separator + header* + '\n' @end_headers @init_hlen + messageLine* + )*; + }%% + +%% write data; + +#define BUFSIZE 8192 + +void test( char *buf ) +{ + int cs, len = strlen( buf ); + char *preserve = 0, *ws = 0; + int hlen = 0; + + %% write init; + char *p = buf; + char *pe = p + len; + %% write exec; + + if ( cs < mailbox_first_final ) { + cout << endl << endl; + cout << "DID NOT FINISH IN A FINAL STATE" << endl; + } +} + +int main() +{ + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston\" <thurston@cs.queensu.ca>\n" + "Subject: the squirrel has landed\n" + "\n" + "Message goes here. \n" + "From (trick from line).\n" + "From: not really a header\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 1)\" Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) this is a really long subject which should fail the length constraint \n" + "Other: 0123456789\n" + "\n" + "Message goes here. \n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 2)\" some guy <sg@net.com>\n" + "From: \"Adrian D. Thurston this name is far too long\" <thurston@cs.queensu.ca>\n" + "Subject: the squirrel has landed\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) \n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 3)\" some guy <sg@net.com>\n" + "From: \"Adrian D. Thurston This name is fore sure absolutely too long\" <t@cs.ca>\n" + "Subject: the squirrel has landed\n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston \" <t@cs.ca>\n" + "Subject: (kill 4) the squirrel has landed\n" + "Other: This is another header field, not interpreted, that is too long\n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston \" <t@cs.ca>\n" + "Subject: (kill 5)the squirrel has landed\n" + "\n" + "This message line is okay.\n" + "But this message line is far too long and will cause an error.\n" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +------ +FROM + "Adrian D. Thurston" | thurston@cs.queensu.ca +SUBJECT + the squirrel has landed +------ +TO + "(kill 1)" Edgar Allen Poe | ep@net.com +SUBJECT + (no subject) this is a really long subject whic + +DID NOT FINISH IN A FINAL STATE +------ +TO + "(kill 2)" some guy | sg@net.com +FROM + "Adrian D. Thurston this name is far too long" + +DID NOT FINISH IN A FINAL STATE +------ +TO + "(kill 3)" some guy | sg@net.com +FROM + + +DID NOT FINISH IN A FINAL STATE +------ +FROM + "Adrian D. Thurston " | t@cs.ca +SUBJECT + (kill 4) the squirrel has landed + + +DID NOT FINISH IN A FINAL STATE +------ +FROM + "Adrian D. Thurston " | t@cs.ca +SUBJECT + (kill 5)the squirrel has landed + + +DID NOT FINISH IN A FINAL STATE +#endif diff --git a/test/minimize1.rl b/test/minimize1.rl new file mode 100644 index 0000000..d7c6ef4 --- /dev/null +++ b/test/minimize1.rl @@ -0,0 +1,83 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct min +{ + int cs; +}; + +%%{ + machine min; + variable curstate fsm->cs; + + action a_or_b { printf("a or b\n"); } + + main := ( + ( 'a' . [ab]* @a_or_b ) | + ( 'b' . [ab]* @a_or_b ) + ) . '\n'; +}%% + +%% write data; + +void min_init( struct min *fsm ) +{ + %% write init; +} + +void min_execute( struct min *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int min_finish( struct min *fsm ) +{ + %% write eof; + + if ( fsm->cs == min_error ) + return -1; + if ( fsm->cs >= min_first_final ) + return 1; + return 0; +} + +struct min fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + min_init( &fsm ); + min_execute( &fsm, buf, len ); + if ( min_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "aaaaaa\n" ); + test( "a\n" ); + test( "abc\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +a or b +a or b +a or b +a or b +a or b +ACCEPT +ACCEPT +a or b +FAIL +#endif diff --git a/test/patact.rl b/test/patact.rl new file mode 100644 index 0000000..c15d93d --- /dev/null +++ b/test/patact.rl @@ -0,0 +1,91 @@ +/* + * @LANG: indep + */ + +char comm; +int top; +int stack[32]; +ptr tokstart; +ptr tokend; +int act; +int val; +%% +%%{ + machine patact; + + other := |* + [a-z]+ => { prints "word\n"; }; + [0-9]+ => { prints "num\n"; }; + [\n ] => { prints "space\n"; }; + *|; + + exec_test := |* + [a-z]+ => { prints "word (w/lbh)\n"; fexec tokend-1; fgoto other; }; + [a-z]+ ' foil' => { prints "word (c/lbh)\n"; }; + [\n ] => { prints "space\n"; }; + '22' => { prints "num (w/switch)\n"; }; + [0-9]+ => { prints "num (w/switch)\n"; fexec tokend-1; fgoto other;}; + [0-9]+ ' foil' => {prints "num (c/switch)\n"; }; + '!';# => { prints "immdiate\n"; fgoto exec_test; }; + *|; + + main := |* + [a-z]+ => { prints "word (w/lbh)\n"; fhold; fgoto other; }; + [a-z]+ ' foil' => { prints "word (c/lbh)\n"; }; + [\n ] => { prints "space\n"; }; + '22' => { prints "num (w/switch)\n"; }; + [0-9]+ => { prints "num (w/switch)\n"; fhold; fgoto other;}; + [0-9]+ ' foil' => {prints "num (c/switch)\n"; }; + '!' => { prints "immdiate\n"; fgoto exec_test; }; + *|; +}%% +/* _____INPUT_____ +"abcd foix\n" +"abcd\nanother\n" +"123 foix\n" +"!abcd foix\n" +"!abcd\nanother\n" +"!123 foix\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +word (w/lbh) +word +space +word +space +ACCEPT +word (w/lbh) +word +space +word +space +ACCEPT +num (w/switch) +num +space +word +space +ACCEPT +immdiate +word (w/lbh) +word +space +word +space +ACCEPT +immdiate +word (w/lbh) +word +space +word +space +ACCEPT +immdiate +num (w/switch) +num +space +word +space +ACCEPT +_____OUTPUT_____ */ + diff --git a/test/range.rl b/test/range.rl new file mode 100644 index 0000000..34bc430 --- /dev/null +++ b/test/range.rl @@ -0,0 +1,76 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct range +{ + int cs; +}; + +%%{ + machine range_fsm; + variable curstate fsm->cs; + + main := ( 'a' .. 'c' | 'c' .. 'e' | 'm' .. 'n' | 'a' .. 'z' ) '\n'; +}%% + +%% write data; + +void range_init( struct range *fsm ) +{ + %% write init; +} + +void range_execute( struct range *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int range_finish( struct range *fsm ) +{ + %% write eof; + + if ( fsm->cs == range_fsm_error ) + return -1; + if ( fsm->cs >= range_fsm_first_final ) + return 1; + return 0; +} + +struct range fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + range_init( &fsm ); + range_execute( &fsm, buf, len ); + if ( range_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "a\n" ); + test( "z\n" ); + test( "g\n" ); + test( "no\n" ); + test( "1\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/repetition.rl b/test/repetition.rl new file mode 100644 index 0000000..23638b3 --- /dev/null +++ b/test/repetition.rl @@ -0,0 +1,293 @@ +/* + * @LANG: c++ + */ + +/* Test repeptition operators. */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +struct Rep +{ + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine Rep; + + action begin { cout << "begin" << endl; } + action in { cout << "in" << endl; } + action end { cout << "end" << endl; } + + a = 'a' >begin @in %end; + b = 'b' >begin @in %end; + c = 'c' >begin @in %end; + d = 'd' >begin @in %end; + + main := + ( a {5} '\n' )* '-\n' + ( b {,5} '\n' )* '-\n' + ( c {5,} '\n' )* '-\n' + ( d {2,5} '\n' )*; +}%% + +%% write data; + +int Rep::init( ) +{ + %% write init; + return 1; +} + +int Rep::execute( const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; + + if ( cs == Rep_error ) + return -1; + if ( cs >= Rep_first_final ) + return 1; + return 0; +} + +int Rep::finish( ) +{ + %% write eof; + if ( cs == Rep_error ) + return -1; + if ( cs >= Rep_first_final ) + return 1; + return 0; +} + +void test( char *buf ) +{ + Rep rep; + int len = strlen( buf ); + rep.init(); + rep.execute( buf, len ); + if ( rep.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "aaaaa\n" + "-\n" + "\n" + "b\n" + "bb\n" + "bbb\n" + "bbbb\n" + "bbbbb\n" + "-\n" + "ccccc\n" + "ccccccc\n" + "cccccccccc\n" + "-\n" + "dd\n" + "ddd\n" + "dddd\n" + "ddddd\n" + ); + + test( + "a\n" + "-\n" + "b\n" + "-\n" + "c\n" + "-\n" + "d\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +ACCEPT +begin +in +FAIL +#endif diff --git a/test/rlscan.rl b/test/rlscan.rl new file mode 100644 index 0000000..943c4f5 --- /dev/null +++ b/test/rlscan.rl @@ -0,0 +1,287 @@ +/* + * Lexes Ragel input files. + * + * @LANG: c++ + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +using namespace std; + +void escapeXML( char *data ) +{ + while ( *data != 0 ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + data += 1; + } +} + +void escapeXML( char c ) +{ + switch ( c ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << c; break; + } +} + +void escapeXML( char *data, int len ) +{ + for ( char *end = data + len; data != end; data++ ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + } +} + +inline void write( char *data ) +{ + cout << data; +} + +inline void write( char c ) +{ + cout << c; +} + +inline void write( char *data, int len ) +{ + cout.write( data, len ); +} + + +%%{ + machine RagelScan; + + word = [a-zA-Z_][a-zA-Z_0-9]*; + integer = [0-9]+; + hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; + + default = ^0; + EOF = 0; + + # Handles comments in outside code and inline blocks. + c_comment := + ( default* :>> '*/' ) + ${ escapeXML( fc ); } + @{ fret; }; + + action emit { + escapeXML( tokstart, tokend-tokstart ); + } + + # + # Inline action code + # + + ilscan := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + '/*' { + write( "/*" ); + fcall c_comment; + }; + '//' [^\n]* '\n' => emit; + + '{' { + write( '{' ); + inline_depth += 1; + }; + + '}' { + write( '}' ); + /* If dropping down to the last } then return + * to ragel code. */ + if ( --inline_depth == 0 ) { + write( "</inline>\n" ); + fgoto rlscan; + } + }; + + default => { escapeXML( *tokstart ); }; + *|; + + # + # Ragel Tokens + # + + rlscan := |* + '}%%' { + if ( !single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + '\n' { + if ( single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + # Word + word { + write( "<word>" ); + write( tokstart, tokend-tokstart ); + write( "</word>\n" ); + }; + + # Decimal integer. + integer { + write( "<int>" ); + write( tokstart, tokend-tokstart ); + write( "</int>\n" ); + }; + + # Hexidecimal integer. + hex { + write( "<hex>" ); + write( tokstart, tokend-tokstart ); + write( "</hex>\n" ); + }; + + # Consume comments. + '#' [^\n]* '\n'; + + # Single literal string. + "'" ( [^'\\] | /\\./ )* "'" { + write( "<single_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</single_lit>\n" ); + }; + + # Double literal string. + '"' ( [^"\\] | /\\./ )* '"' { + write( "<double_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</double_lit>\n" ); + }; + + # Or literal. + '[' ( [^\]\\] | /\\./ )* ']' { + write( "<or_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</or_lit>\n" ); + }; + + # Regex Literal. + '/' ( [^/\\] | /\\./ ) * '/' { + write( "<re_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</re_lit>\n" ); + }; + + # Open an inline block + '{' { + inline_depth = 1; + write( "<inline>{" ); + fgoto ilscan; + }; + + punct { + write( "<symbol>" ); + escapeXML( fc ); + write( "</symbol>\n" ); + }; + + default; + *|; + + # + # Outside code. + # + + main := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + + '/*' { + escapeXML( tokstart, tokend-tokstart ); + fcall c_comment; + }; + + '//' [^\n]* '\n' => emit; + + '%%{' { + write( "<section>\n" ); + single_line = false; + fgoto rlscan; + }; + + '%%' { + write( "<section>\n" ); + single_line = true; + fgoto rlscan; + }; + + default { + escapeXML( *tokstart ); + }; + + # EOF. + EOF; + *|; +}%% + +%% write data nofinal; + +void test( char *data ) +{ + std::ios::sync_with_stdio(false); + + int cs, act; + char *tokstart, *tokend; + int stack[1], top; + + bool single_line = false; + int inline_depth = 0; + + %% write init; + + /* Read in a block. */ + char *p = data; + char *pe = data + strlen( data ); + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } +} + +#define BUFSIZE 2048 + +int main() +{ + std::ios::sync_with_stdio(false); + + test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n"); + + return 0; +} +#ifdef _____OUTPUT_____ +hi <section> +<re_lit>/'}%%'/</re_lit> +<inline>{ /*{*/ {} }</inline> +<symbol>+</symbol> +<single_lit>'\''</single_lit> +</section> +there +#endif diff --git a/test/runtests b/test/runtests new file mode 100755 index 0000000..32b5fb0 --- /dev/null +++ b/test/runtests @@ -0,0 +1,251 @@ +#!/bin/bash + +# +# Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +while getopts "gcnmleT:F:G:P:CDJ" opt; do + case $opt in + T|F|G|P) + genflags="$genflags -$opt$OPTARG" + options="$options -$opt$OPTARG" + ;; + n|m|l|e) + minflags="$minflags -$opt" + options="$options -$opt" + ;; + c) + compile_only="true" + options="$options -$opt" + ;; + g) + allow_generated="true" + ;; + C|D|J) + langflags="$langflags -$opt" + ;; + esac +done + +[ -z "$minflags" ] && minflags="-n -m -l -e" +[ -z "$genflags" ] && genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2" +[ -z "$langflags" ] && langflags="-C -D -J" + +shift $((OPTIND - 1)); + +[ -z "$*" ] && set -- *.rl + +# find the config file +config=../common/config.h +ragel=../ragel/ragel +rlcodegen=../rlcodegen/rlcodegen +if ! [ -d ../common ]; then + config=../$config + ragel=../$ragel + rlcodegen=../$rlcodegen +fi + +cxx_compiler=`sed '/^#define CXX/s/#define CXX *//p;d' $config` +c_compiler=`sed '/^#define CC/s/#define CC *//p;d' $config` +objc_compiler=`sed '/^#define GOBJC/s/#define GOBJC *//p;d' $config` +d_compiler=`sed '/^#define GDC/s/#define GDC *//p;d' $config` +java_compiler=`sed '/#define JAVAC/s/#define JAVAC *//p;d' $config` +txl_engine=`sed '/^#define TXL/s/#define TXL *//p;d' $config` + +function test_error +{ + exit 1; +} + +for test_case; do + root=${test_case%.rl}; + + if ! [ -f "$test_case" ]; then + echo "runtests: not a file: $test_case"; >&2 + exit 1; + fi + + # Check if we should ignore the test case + ignore=`sed '/@IGNORE:/s/^.*: *//p;d' $test_case` + if [ "$ignore" = yes ]; then + continue; + fi + + # If the generated flag is given make sure that the test case is generated. + is_generated=`sed '/@GENERATED:/s/^.*: *//p;d' $test_case` + if [ "$is_generated" = yes ] && [ "$allow_generated" != true ]; then + continue; + fi + + expected_out=$root.exp; + sed '1,/_____OUTPUT_____/d;$d' $test_case > $expected_out + + lang=`sed '/@LANG:/s/^.*: *//p;d' $test_case` + if [ -z "$lang" ]; then + echo "$test_case: language unset"; >&2 + exit 1; + fi + + case $lang in + c++) + code_suffix=cpp; + compiler=$cxx_compiler; + lang_opt=-C; + cflags="-pedantic -ansi -Wall -O3" + ;; + d) + code_suffix=d; + compiler=$d_compiler; + lang_opt=-D; + cflags="-Wall -O3" + ;; + c) + code_suffix=c; + compiler=$c_compiler; + lang_opt=-C; + cflags="-pedantic -ansi -Wall -O3" + ;; + obj-c) + code_suffix=m; + compiler=$objc_compiler + lang_opt=-C; + cflags="-Wall -O3 -fno-strict-aliasing -lobjc" + ;; + java) + code_suffix=java; + compiler=$java_compiler + lang_opt=-J; + cflags="" + ;; + indep) + # If we have no compiler for the source program then skip it. + [ -z "$txl_engine" ] && continue + for lang in c d java; do + case $lang in + c) lf="-C";; + d) lf="-D";; + java) lf="-J";; + esac + + echo "$langflags" | grep -e $lf >/dev/null || continue + + targ=${root}_$lang.rl + echo "./langtrans_$lang.sh $test_case > $targ" + if ! ./langtrans_$lang.sh $test_case > $targ; then + test_error + fi + echo "./runtests -g $options $targ" + if ! ./runtests -g $options $targ; then + test_error + fi + done + continue; + ;; + *) + echo "$test_case: unknown language type $lang" >&2 + exit 1; + ;; + esac + + # Make sure that we are interested in the host language. + echo "$langflags" | grep -e $lang_opt >/dev/null || continue + + code_src=$root.$code_suffix; + binary=$root.bin; + output=$root.out; + + # If we have no compiler for the source program then skip it. + [ -z "$compiler" ] && continue + + additional_cflags=`sed '/@CFLAGS:/s/^.*: *//p;d' $test_case` + [ -n "$additional_cflags" ] && cflags="$cflags $additional_cflags" + + allow_minflags=`sed '/@ALLOW_MINFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_minflags" ] && allow_minflags="-n -m -l -e" + + allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2" + + for min_opt in $minflags; do + for gen_opt in $genflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + + grep_gen_opt=${gen_opt} + split_iters=${gen_opt#-P} + if test $split_iters != $gen_opt; then + grep_gen_opt="-P"; + fi + echo "$allow_genflags" | grep -e $grep_gen_opt >/dev/null || continue + + echo "$ragel $min_opt $lang_opt $test_case | $rlcodegen $gen_opt -o $code_src" + if ! $ragel $min_opt $lang_opt $test_case | $rlcodegen $gen_opt -o $code_src; then + test_error; + fi + + split_objs="" + if test $split_iters != $gen_opt; then + n=0; + while test $n -lt $split_iters; do + part_root=${root}_`awk 'BEGIN { + width = 0; + high = '$split_iters' - 1; + while ( high > 0 ) { + width = width + 1; + high = int(high / 10); + } + suffFormat = "%" width "." width "d\n"; + printf( suffFormat, '$n' ); + exit 0; + }'` + part_src=${part_root}.c + part_bin=${part_root}.o + echo "$compiler -c $cflags -o $part_bin $part_src" + if ! $compiler -c $cflags -o $part_bin $part_src; then + test_error; + fi + split_objs="$split_objs $part_bin" + n=$((n+1)) + done + fi + + out_args="" + [ $lang != java ] && out_args="-o ${binary}"; + + echo "$compiler ${cflags} ${out_args} ${code_src}" + if ! $compiler ${cflags} ${out_args} ${code_src}; then + test_error; + fi + + if [ "$compile_only" != "true" ]; then + echo -n "running $root ... "; + + exec_cmd=./$binary + [ $lang = java ] && exec_cmd="java $root" + + $exec_cmd 2>&1 > $output; + if diff $expected_out $output > /dev/null; then + echo "passed"; + else + echo "FAILED"; + test_error; + fi; + fi + done + done +done diff --git a/test/stateact1.rl b/test/stateact1.rl new file mode 100644 index 0000000..ef50c75 --- /dev/null +++ b/test/stateact1.rl @@ -0,0 +1,48 @@ +/* + * @LANG: indep + * + * Test in and out state actions. + */ +%% +%%{ + machine state_act; + + action a1 { prints "a1\n"; } + action a2 { prints "a2\n"; } + action b1 { prints "b1\n"; } + action b2 { prints "b2\n"; } + action c1 { prints "c1\n"; } + action c2 { prints "c2\n"; } + action next_again {fnext again;} + + hi = 'hi'; + line = again: + hi + >to b1 + >from b2 + '\n' + >to c1 + >from c2 + @next_again; + + main := line* + >to a1 + >from a2; +}%% + +/* _____INPUT_____ +"hi\nhi\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +a2 +b2 +c1 +c2 +b1 +b2 +c1 +c2 +b1 +FAIL +_____OUTPUT_____ */ diff --git a/test/statechart1.rl b/test/statechart1.rl new file mode 100644 index 0000000..9f1ce49 --- /dev/null +++ b/test/statechart1.rl @@ -0,0 +1,102 @@ +/* + * @LANG: c + */ + +/* + * Test in and out state actions. + */ + +#include <stdio.h> +#include <string.h> + +struct state_chart +{ + int cs; +}; + +%%{ + machine state_chart; + variable curstate fsm->cs; + + action a { printf("a"); } + action b { printf("b"); } + action hexa { printf("a"); } + action hexb { printf("b"); } + + hex_a = '0x' '0'* '61' @hexa; + hex_b = '0x' '0'* '62' @hexb; + + a = 'a' @a | hex_a; + b = 'b' @b | hex_b; + ws = ' '+; + + mach = + start: ( + a -> st1 | + b -> st2 | + zlen -> final + ), + st1: ( + a -> st1 | + ws -> start | + zlen -> final + ), + st2: ( + b -> st2 | + ws -> start | + zlen -> final + ); + + main := ( mach '\n' )*; +}%% + +%% write data; + +void state_chart_init( struct state_chart *fsm ) +{ + %% write init; +} + +void state_chart_execute( struct state_chart *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int state_chart_finish( struct state_chart *fsm ) +{ + %% write eof; + + if ( fsm->cs == state_chart_error ) + return -1; + if ( fsm->cs >= state_chart_first_final ) + return 1; + return 0; +} + +struct state_chart sc; + +void test( char *buf ) +{ + int len = strlen( buf ); + state_chart_init( &sc ); + state_chart_execute( &sc, buf, len ); + state_chart_finish( &sc ); + printf("\n"); +} + +int main() +{ + test( + "aa0x0061aa b\n" + "bbb0x62b 0x61 0x000062\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +aaaaabbbbbbab +#endif diff --git a/test/strings1.rl b/test/strings1.rl new file mode 100644 index 0000000..d156da1 --- /dev/null +++ b/test/strings1.rl @@ -0,0 +1,195 @@ +/* + * @LANG: c + */ + +#include <string.h> +#include <stdio.h> + +struct strs +{ + int cs; +}; + +%%{ + machine strs; + variable curstate fsm->cs; + + main := + "__gmon_start__\n" | + "cerr\n" | + "__cp_push_exception\n" | + "_DYNAMIC\n" | + "__rtti_user\n" | + "__rtti_si\n" | + "_init\n" | + "__throw\n" | + "__deregister_frame_info\n" | + "terminate__Fv\n" | + "__builtin_vec_new\n" | + "_fini\n" | + "__builtin_vec_delete\n" | + "_GLOBAL_OFFSET_TABLE_\n" | + "__nw__FUiPv\n" | + "__builtin_delete\n" | + "__builtin_new\n" | + "cout\n" | + "__register_frame_info\n" | + "__eh_alloc\n" | + "strcpy\n" | + "stdout\n" | + "memmove\n" | + "memcpy\n" | + "malloc\n" | + "isatty\n" | + "strtoul\n" | + "fprintf\n" | + "stdin\n" | + "ferror\n" | + "strncpy\n" | + "unlink\n" | + "strcasecmp\n" | + "realloc\n" | + "_IO_getc\n" | + "fread\n" | + "memset\n" | + "__assert_fail\n" | + "strcmp\n" | + "stderr\n" | + "fwrite\n" | + "exit\n" | + "fopen\n" | + "atoi\n" | + "fileno\n" | + "_IO_stdin_used\n" | + "__libc_start_main\n" | + "strlen\n" | + "free\n" | + "_edata\n" | + "__bss_start\n" | + "_end\n" | + "QVhl\n" | + "BPPh\n" | + "PHRV\n" | + "PHRj\n" | + "PHRj\n" | + "jphy\n" | + "jqhy\n" | + "PHRj\n" | + "PHRj\n" | + "LWVS\n" | + "LWVS\n" | + "bad_alloc\n" | + "main\n" | + "false\n" | + "help\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "ascii\n" | + "extend\n" | + "alnum\n" | + "alpha\n" | + "cntrl\n" | + "digit\n" | + "graph\n" | + "lower\n" | + "print\n" | + "punct\n" | + "space\n" | + "upper\n" | + "xdigit\n" | + "false\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "TransStruct\n" | + "StateStruct\n" | + "Struct\n" | + "Init\n" | + "bad_alloc\n" | + "TransStruct\n" | + "StateStruct\n" | + "Struct\n" | + "Init\n" | + "Accept\n" | + "Finish\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept"; +}%% + +%% write data; + +void strs_init( struct strs *fsm ) +{ + %% write init; +} + +void strs_execute( struct strs *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int strs_finish( struct strs *fsm ) +{ + %% write eof; + + if ( fsm->cs == strs_error ) + return -1; + if ( fsm->cs >= strs_first_final ) + return 1; + return 0; +} + +struct strs fsm; +void test( char *buf ) +{ + int len = strlen( buf ); + strs_init( &fsm ); + strs_execute( &fsm, buf, len ); + if ( strs_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "stdin\n" ); + test( "bad_alloc\n" ); + test( "_GLOBAL_OFFSET_TABLE_\n" ); + test( "not in\n" ); + test( + "isatty\n" + "junk on end.\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/strings2.h b/test/strings2.h new file mode 100644 index 0000000..1cf0ce9 --- /dev/null +++ b/test/strings2.h @@ -0,0 +1,9 @@ +#ifndef _STRINGS1_H +#define _STRINGS1_H + +struct strs +{ + int cs; +}; + +#endif diff --git a/test/strings2.rl b/test/strings2.rl new file mode 100644 index 0000000..ad4dd40 --- /dev/null +++ b/test/strings2.rl @@ -0,0 +1,1349 @@ +/* + * @LANG: c + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -P + * @ALLOW_MINFLAGS: -n -m -l + */ + +#include <string.h> +#include <stdio.h> + +#include "strings2.h" + +%%{ + machine strs; + variable curstate fsm->cs; + + main := + "/lib/ld-linux.so.2\n" | + "libstdc++-libc6.2-2.so.3\n" | + "cerr\n" | + "__cp_push_exception\n" | + "_DYNAMIC\n" | + "endl__FR7ostream\n" | + "__ls__7ostreamc\n" | + "_._9exception\n" | + "__vt_9bad_alloc\n" | + "__rtti_user\n" | + "__ls__7ostreamPFR7ostream_R7ostream\n" | + "__rtti_si\n" | + "_init\n" | + "bad__C3ios\n" | + "__throw\n" | + "__ls__7ostreamPCc\n" | + "__deregister_frame_info\n" | + "terminate__Fv\n" | + "__ls__7ostreamb\n" | + "__ls__7ostreami\n" | + "__8ofstreamiPCcii\n" | + "__builtin_vec_new\n" | + "_fini\n" | + "__9exception\n" | + "__builtin_vec_delete\n" | + "_GLOBAL_OFFSET_TABLE_\n" | + "__vt_9exception\n" | + "__nw__FUiPv\n" | + "_._9bad_alloc\n" | + "__builtin_delete\n" | + "__builtin_new\n" | + "cout\n" | + "__register_frame_info\n" | + "__eh_alloc\n" | + "__gmon_start__\n" | + "libm.so.6\n" | + "libc.so.6\n" | + "strcpy\n" | + "stdout\n" | + "memmove\n" | + "memcpy\n" | + "malloc\n" | + "strtoul\n" | + "fprintf\n" | + "stdin\n" | + "ferror\n" | + "strncpy\n" | + "strcasecmp\n" | + "realloc\n" | + "_IO_getc\n" | + "fread\n" | + "memset\n" | + "clearerr\n" | + "__assert_fail\n" | + "strcmp\n" | + "stderr\n" | + "fwrite\n" | + "__errno_location\n" | + "exit\n" | + "fopen\n" | + "atoi\n" | + "_IO_stdin_used\n" | + "__libc_start_main\n" | + "strlen\n" | + "free\n" | + "_edata\n" | + "__bss_start\n" | + "_end\n" | + "GLIBC_2.1\n" | + "GLIBC_2.0\n" | + "PTRh\n" | + "QVhL\n" | + "<WVS\n" | + "LWVS\n" | + "PHRW\n" | + "<WVS\n" | + "\WVS\n" | + ",WVS\n" | + "@Phl\n" | + "<WVS\n" | + "jZjA\n" | + "jzja\n" | + "j9j0\n" | + "j9j0\n" | + "jZjA\n" | + "jzja\n" | + "jzja\n" | + "jZjA\n" | + "j~j!\n" | + "j~j \n" | + "j/j!\n" | + "j@j:\n" | + "j`j[\n" | + "j~j{\n" | + "j9j0\n" | + "jFjA\n" | + "jfja\n" | + ",WVS\n" | + ",WVS\n" | + ";C<|\n" | + "<WVS\n" | + "C ;C\n" | + "C$;C\n" | + "C$;C\n" | + "C ;C\n" | + ",WVS\n" | + ";E uF\n" | + "P ;U\n" | + "P ;U\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "u!h@\n" | + "PHRj\n" | + "PHRj\n" | + "P\ U\n" | + "j]hY\n" | + "johY\n" | + "PHRj\n" | + "PHRj\n" | + "E fPj\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fPj\n" | + "t$h`\n" | + "F ;C } \n" | + "F ;C ~ \n" | + "@X:BXt)\n" | + "\WVS\n" | + "\WVS\n" | + "PPRS\n" | + "F ;C } \n" | + "F ;C ~ \n" | + "@X:BXt)\n" | + ";H(}:\n" | + "@ fP\n" | + ";P |\n" | + "<WVS\n" | + ";P |\n" | + "bad_alloc\n" | + "usage: ragel [options] file\n" | + "general:\n" | + " -h, -H, -? Disply this usage.\n" | + " -o <file> Write output to <file>.\n" | + " -s Print stats on the compiled fsm.\n" | + " -f Dump the final fsm.\n" | + "fsm minimization:\n" | + " -n No minimization (default).\n" | + " -m Find the minimal fsm accepting the language.\n" | + "generated code language:\n" | + " -c Generate c code (default).\n" | + " -C Generate c++ code.\n" | + "generated code style:\n" | + " -T0 Generate a table driven fsm (default).\n" | + " -T1 Generate a faster table driven fsm.\n" | + " -S0 Generate a switch driven fsm.\n" | + " -G0 Generate a goto driven fsm.\n" | + " -G1 Generate a faster goto driven fsm.\n" | + " -G2 Generate a really fast goto driven fsm.\n" | + "char * FileNameFromStem(char *, char *)\n" | + "main.cpp\n" | + "len > 0\n" | + "main\n" | + "ragel: main graph not defined\n" | + "graph states: \n" | + "graph transitions: \n" | + "machine states: \n" | + "machine functions: \n" | + "function array: \n" | + "T:S:G:Cco:senmabjkfhH?-:\n" | + "ragel: zero length output file name given\n" | + "ragel: output file already given\n" | + "ragel: invalid param specified (try -h for a list of options)\n" | + "help\n" | + "ragel: zero length input file name given\n" | + "ragel: input file already given\n" | + "ragel: warning: -e given but minimization is not enabled\n" | + "ragel: no input file (try -h for a list of options)\n" | + " for reading\n" | + "ragel: could not open \n" | + " for writing\n" | + "ragel: error opening \n" | + " * Parts of this file are copied from Ragel source covered by the GNU\n" | + " * GPL. As a special exception, you may use the parts of this file copied\n" | + " * from Ragel source without restriction. The remainder is derived from\n" | + "bad_alloc\n" | + "%s:%i: unterminated literal\n" | + "%s:%i: unterminated comment\n" | + "%s:%i: bad character in literal\n" | + "fatal flex scanner internal error--no action found\n" | + "fatal flex scanner internal error--end of buffer missed\n" | + "fatal error - scanner input buffer overflow\n" | + "input in flex scanner failed\n" | + "out of dynamic memory in yy_create_buffer()\n" | + "out of dynamic memory in yy_scan_buffer()\n" | + "out of dynamic memory in yy_scan_bytes()\n" | + "bad buffer in yy_scan_bytes()\n" | + "bad_alloc\n" | + "%s:%i: warning: range gives null fsm\n" | + "%s:%i: warning: literal used in range is not of length 1, using 0x%x\n" | + "%s:%i: warning: overflow in byte constant\n" | + "parse error\n" | + "parser stack overflow\n" | + "%s:%i: %s\n" | + "bad_alloc\n" | + "extend\n" | + "ascii\n" | + "alpha\n" | + "digit\n" | + "alnum\n" | + "lower\n" | + "upper\n" | + "cntrl\n" | + "graph\n" | + "print\n" | + "punct\n" | + "space\n" | + "xdigit\n" | + "struct Fsm * FactorWithAugNode::Walk()\n" | + "parsetree.cpp\n" | + "false\n" | + "bad_alloc\n" | + "xx []()\n" | + " df \n" | + "StartState: \n" | + "Final States:\n" | + "void FsmGraph<State,int,Trans>::AttachStates(State *, State *, Trans *, FsmKeyType, int)\n" | + "rlfsm/fsmattach.cpp\n" | + "trans->toState == __null\n" | + "trans->fromState == __null\n" | + "void FsmGraph<State,int,Trans>::DetachStates(State *, State *, Trans *, FsmKeyType, int)\n" | + "trans->toState == to\n" | + "trans->fromState == from\n" | + "inTel != __null\n" | + "void Vector<BstMapEl<int,int>,ResizeExpn>::setAs(const Vector<BstMapEl<int,int>,ResizeExpn> &)\n" | + "aapl/vectcommon.h\n" | + "&v != this\n" | + "void FsmGraph<State,int,Trans>::ChangeRangeLowerKey(Trans *, int, int)\n" | + "inRangeEl != __null\n" | + "void FsmGraph<State,int,Trans>::IsolateStartState()\n" | + "rlfsm/fsmgraph.cpp\n" | + "md.stateDict.nodeCount == 0\n" | + "md.stfil.listLength == 0\n" | + "struct State * FsmGraph<State,int,Trans>::DetachState(State *)\n" | + "fromTel != __null\n" | + "struct Trans * FsmGraph<State,int,Trans>::AttachStates(State *, State *, FsmKeyType, int, int)\n" | + "outTel != __null\n" | + "outTel1 != __null\n" | + "from->defOutTrans == __null\n" | + "void FsmGraph<State,int,Trans>::VerifyOutFuncs()\n" | + "state->outTransFuncTable.tableLength == 0\n" | + "!state->isOutPriorSet\n" | + "state->outPriority == 0\n" | + "void FsmGraph<State,int,Trans>::VerifyIntegrity()\n" | + "rlfsm/fsmbase.cpp\n" | + "outIt.trans->fromState == state\n" | + "inIt.trans->toState == state\n" | + "static int FsmTrans<State,Trans,int,CmpOrd<int> >::ComparePartPtr(FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" | + "rlfsm/fsmstate.cpp\n" | + "false\n" | + "void FsmGraph<State,int,Trans>::InTransMove(State *, State *)\n" | + "dest != src\n" | + "static bool FsmTrans<State,Trans,int,CmpOrd<int> >::ShouldMarkPtr(MarkIndex<State> &, FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" | + "bad_alloc\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + "break;}\n" | + "unsigned char\n" | + "unsigned short\n" | + "unsigned int\n" | + "{0, \n" | + "/* Forward dec state for the transition structure. */\n" | + "struct \n" | + "StateStruct;\n" | + "/* A single transition. */\n" | + "struct \n" | + "TransStruct\n" | + " struct \n" | + "StateStruct *toState;\n" | + " int *funcs;\n" | + "typedef struct \n" | + "TransStruct \n" | + "Trans;\n" | + "/* A single state. */\n" | + "struct \n" | + "StateStruct\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " unsigned int dflIndex;\n" | + " int *outFuncs;\n" | + " int isFinState;\n" | + "typedef struct \n" | + "StateStruct \n" | + "State;\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + "State *curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "#define f \n" | + "#define s \n" | + "#define i \n" | + "#define t \n" | + "/* The array of functions. */\n" | + "#if \n" | + "static int \n" | + "_f[] = {\n" | + "#endif\n" | + "/* The array of indicies into the transition array. */\n" | + "#if \n" | + "static \n" | + "_i[] = {\n" | + "#endif\n" | + "/* The aray of states. */\n" | + "static \n" | + "State \n" | + "_s[] = {\n" | + "/* The array of transitions. */\n" | + "static \n" | + "Trans \n" | + "_t[] = {\n" | + "/* The start state. */\n" | + "static \n" | + "State *\n" | + "_startState = s+\n" | + "#undef f\n" | + "#undef s\n" | + "#undef i\n" | + "#undef t\n" | + "* Execute functions pointed to by funcs until the null function is found. \n" | + "inline static void \n" | + "ExecFuncs( \n" | + " *fsm, int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + " * Init the fsm to a runnable state.\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + " * Did the fsm accept? \n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + "State *cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char) *p;\n" | + "Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + \n" | + " ((\n" | + "*)cs->transIndex)[c - cs->lowIndex];\n" | + " else {\n" | + " /* Use the default index as the char is out of range. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + "ExecFuncs( fsm, trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " fsm->curState = cs;\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + "State *cs = fsm->curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + "ExecFuncs( fsm, cs->outFuncs, 0 );\n" | + " fsm->accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " fsm->curState = 0;\n" | + "class \n" | + "public:\n" | + " /* Forward dec state for the transition structure. */\n" | + " struct State;\n" | + " /* A single transition. */\n" | + " struct Trans\n" | + " State *toState;\n" | + " int *funcs;\n" | + " /* A single state. */\n" | + " struct State\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " unsigned int dflIndex;\n" | + " int *outFuncs;\n" | + " int isFinState;\n" | + " /* Constructor. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " State *curState;\n" | + " int accept;\n" | + " inline void ExecFuncs( int *funcs, char *p );\n" | + "/* The array of functions. */\n" | + "#if \n" | + "::State \n" | + "/* The array of trainsitions. */\n" | + "static \n" | + "::Trans \n" | + "/* The start state. */\n" | + "static \n" | + "::State *\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline void \n" | + "::ExecFuncs( int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + " * Constructor\n" | + " Init();\n" | + "Init\n" | + "void \n" | + "::Init( )\n" | + " curState = \n" | + "_startState;\n" | + " accept = 0;\n" | + "::Accept( )\n" | + " return accept;\n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " State *cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + " Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + " ExecFuncs( trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " curState = cs;\n" | + "::Finish( )\n" | + " State *cs = curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + " ExecFuncs( cs->outFuncs, 0 );\n" | + " accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " curState = 0;\n" | + "10TabCodeGen\n" | + "11CTabCodeGen\n" | + "12CCTabCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + " break;\n" | + "/* Forward dec state for the transition structure. */\n" | + "struct \n" | + "StateStruct;\n" | + "/* A single transition. */\n" | + "struct \n" | + "TransStruct\n" | + " struct \n" | + "StateStruct *toState;\n" | + " int funcs;\n" | + "typedef struct \n" | + "TransStruct \n" | + "Trans;\n" | + "/* A single state. */\n" | + "struct \n" | + "StateStruct\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " int dflIndex;\n" | + " int outFuncs;\n" | + " int isFinState;\n" | + "typedef struct \n" | + "StateStruct \n" | + "State;\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + "State *curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "#define s \n" | + "#define i \n" | + "#define t \n" | + "/* The array of indicies into the transition array. */\n" | + "#if \n" | + "static \n" | + "_i[] = {\n" | + "#endif\n" | + "/* The aray of states. */\n" | + "static \n" | + "State \n" | + "_s[] = {\n" | + "/* The array of trainsitions. */\n" | + "static \n" | + "Trans \n" | + "_t[] = {\n" | + "/* The start state. */\n" | + "static \n" | + "State *\n" | + "_startState = s+\n" | + "#undef f\n" | + "#undef s\n" | + "#undef i\n" | + "#undef t\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline static void \n" | + "ExecFuncs( \n" | + " *fsm, int funcs, char *p )\n" | + " switch ( funcs ) {\n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/****************************************\n" | + "Accept\n" | + " * Did the fsm accept? \n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + "State *cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + "Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + \n" | + " ((\n" | + "*)cs->transIndex)[c - cs->lowIndex];\n" | + " else {\n" | + " /* Use the default index as the char is out of range. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs >= 0 )\n" | + "ExecFuncs( fsm, trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " fsm->curState = cs;\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + "State *cs = fsm->curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + "ExecFuncs( fsm, cs->outFuncs, 0 );\n" | + " fsm->accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " fsm->curState = 0;\n" | + "class \n" | + "public:\n" | + " /* Function and index type. */\n" | + " typedef int Func;\n" | + " /* Forward dec state for the transition structure. */\n" | + " struct State;\n" | + " /* A single transition. */\n" | + " struct Trans\n" | + " State *toState;\n" | + " int funcs;\n" | + " /* A single state. */\n" | + " struct State\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " int dflIndex;\n" | + " int outFuncs;\n" | + " int isFinState;\n" | + " /* Constructor. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " State *curState;\n" | + " int accept;\n" | + " inline void ExecFuncs( int funcs, char *p );\n" | + "::State \n" | + "::Trans \n" | + "::State *\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline void \n" | + "::ExecFuncs( int funcs, char *p )\n" | + " switch ( funcs ) {\n" | + "/****************************************\n" | + " * Constructor\n" | + " Init();\n" | + "/****************************************\n" | + "::Init( )\n" | + " curState = \n" | + "_startState;\n" | + " accept = 0;\n" | + "/****************************************\n" | + " * Did the fsm accept? \n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " State *cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + " Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + " ExecFuncs( trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " curState = cs;\n" | + "/**********************************************************************\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " State *cs = curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + " ExecFuncs( cs->outFuncs, 0 );\n" | + " accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " curState = 0;\n" | + "11FTabCodeGen\n" | + "12CFTabCodeGen\n" | + "13CCFTabCodeGen\n" | + "bad_alloc\n" | + "cs = -1; \n" | + "cs = \n" | + "break;\n" | + " switch( cs ) {\n" | + " case \n" | + " switch ( c ) {\n" | + "case \n" | + "default: \n" | + " }\n" | + " break;\n" | + " switch( cs ) {\n" | + "accept = 1; \n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " int cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " unsigned char c = (unsigned char)*p;\n" | + " fsm->curState = cs;\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " int cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " unsigned char c = (unsigned char)*p;\n" | + " curState = cs;\n" | + "/**********************************************************************\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "10SelCodeGen\n" | + "11CSelCodeGen\n" | + "12CCSelCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + "goto tr\n" | + "goto st\n" | + "goto err;\n" | + " case \n" | + "break;}\n" | + ": goto st\n" | + " case \n" | + " default: return;\n" | + " goto st\n" | + " if ( --len == 0 )\n" | + " goto out\n" | + " switch( (alph) *++p ) {\n" | + "case \n" | + " default: \n" | + " return;\n" | + "curState = \n" | + " switch( cs ) {\n" | + "accept = 1; \n" | + "break;\n" | + "err:\n" | + "curState = -1;\n" | + ", p );\n" | + "ExecFuncs( fsm, f+\n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "#define f \n" | + "#define alph unsigned char\n" | + "/* The array of functions. */\n" | + "#if \n" | + "static int \n" | + "_f[] = {\n" | + "#endif\n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/***************************************************************************\n" | + " * Function exection. We do not inline this as in tab\n" | + " * code gen because if we did, we might as well just expand \n" | + " * the function as in the faster goto code generator.\n" | + "static void \n" | + "ExecFuncs( \n" | + " *fsm, int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data - 1;\n" | + " register int len = dlen + 1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef f\n" | + "#undef alph\n" | + " ExecFuncs( f+\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + " /* Function exection. We do not inline this as in tab code gen\n" | + " * because if we did, we might as well just expand the function \n" | + " * as in the faster goto code generator. */\n" | + " void ExecFuncs( int *funcs, char * );\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + "/* some defines to lessen the code size. */\n" | + "#define f \n" | + "#endif\n" | + "/****************************************\n" | + " * Make sure the fsm is initted.\n" | + " Init();\n" | + "/****************************************\n" | + " * Initialize the fsm.\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "void \n" | + "::ExecFuncs( int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data - 1;\n" | + " register int len = dlen + 1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "/**********************************************************************\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef f\n" | + "#undef alph\n" | + "11GotoCodeGen\n" | + "12CGotoCodeGen\n" | + "13CCGotoCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + " break;\n" | + ", p );\n" | + "ExecFuncs( fsm, \n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/***************************************************************************\n" | + " * Function exection. We do not inline this as in tab\n" | + " * code gen because if we did, we might as well just expand \n" | + " * the function as in the faster goto code generator.\n" | + "static void \n" | + "ExecFuncs( \n" | + " *fsm, int func, char *p )\n" | + " switch ( func ) {\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef alph\n" | + " ExecFuncs( \n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + " /* Function exection. We do not inline this as in tab code gen\n" | + " * because if we did, we might as well just expand the function \n" | + " * as in the faster goto code generator. */\n" | + " void ExecFuncs( int func, char *p );\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "void \n" | + "::ExecFuncs( int func, char *p )\n" | + " switch ( func ) {\n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef alph\n" | + "12FGotoCodeGen\n" | + "13CFGotoCodeGen\n" | + "14CCFGotoCodeGen\n" | + "11GotoCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef alph\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef alph\n" | + "13IpGotoCodeGen\n" | + "14CIpGotoCodeGen\n" | + "15CCIpGotoCodeGen\n" | + "11GotoCodeGen\n" | + "10FsmCodeGen\n"; +}%% + +%% write data; +struct strs the_fsm; + +void test( char *buf ) +{ + struct strs *fsm = &the_fsm; + char *p = buf; + char *pe = buf + strlen( buf ); + + %% write init; + %% write exec; + + %% write eof; + + if ( fsm->cs >= strs_first_final ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "stdin\n" ); + test( "bad_alloc\n" ); + test( "_GLOBAL_OFFSET_TABLE_\n" ); + test( "not in\n" ); + test( + "isatty\n" + "junk on end.\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/testcase.txl b/test/testcase.txl new file mode 100644 index 0000000..cd02bb8 --- /dev/null +++ b/test/testcase.txl @@ -0,0 +1,177 @@ +comments + '# +end comments + +tokens + union "\[[(\\\c)#\]]*\]" +end tokens + +compounds + '%% '%%{ '}%% '== ':= '-> '<> '>= '<= '=> + '|* '*| + '>! '<! '$! '%! '@! '<>! + '>/ '</ '$/ '%/ '@/ '<>/ +end compounds + +keys + 'int 'bool 'true 'false 'char 'ptr + 'if 'else 'printi 'prints + 'fc 'fpc 'fbreak 'fgoto 'fcall 'fret 'fhold 'fexec +end keys + +define lang_indep + [al_statements] + '%% [NL] + [al_statements] + [ragel_def] +end define + +define ragel_def + '%%{ [NL] [IN] + [ragel_program] + [EX] '}%% [NL] +end define + +define ragel_program + [repeat statement] +end define + +define statement + [machine_stmt] + | [action_stmt] + | [cond_action_stmt] + | [machine_def] + | [machine_inst] +end define + +define machine_stmt + 'machine [id] '; [NL] +end define + +define action_stmt + 'action [id] [al_host_block] +end define + +define cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] +end define + +define al_statements + [repeat action_lang_stmt] +end define + +define action_lang_stmt + [al_ragel_stmt] + | [al_variable_decl] + | [al_expr_stmt] + | [al_if_stmt] + | [al_print_stmt] + | '{ [al_statements] '} +end define + +define al_print_stmt + [print_cmd] [al_expr] '; [NL] +end define + +define print_cmd + 'printi | 'prints +end define + +define al_variable_decl + [al_type_decl] [id] [opt union] '; [NL] +end define + +define al_array_decl + '[ [number] '] +end define + +define al_type_decl + 'int | 'bool | 'char | 'ptr +end define + +define al_expr_stmt + [al_expr] '; [NL] +end define + +define al_expr + [al_term] [repeat al_expr_extend] +end define + +define al_expr_extend + [al_expr_op] [al_term] +end define + +define al_expr_op + '= | '+ | '- | '* | '/ | '== | '<= | '>= +end define + +define al_term + [id] + | [opt al_sign] [number] + | [stringlit] + | [charlit] + | 'fc + | 'true + | 'false + | '( [al_expr] ') +end define + +define al_sign + '- | '+ +end define + +define al_if_stmt + 'if '( [al_expr] ') [NL] [IN] + [action_lang_stmt] [EX] + [opt al_else] +end define + +define al_else + 'else [NL] [IN] + [action_lang_stmt] [EX] +end define + +define al_ragel_stmt + 'fbreak '; [NL] + | 'fhold '; [NL] + | 'fexec [repeat al_expr] '; [NL] + | 'fnext [id] '; [NL] + | 'fgoto [id] '; [NL] + | 'fcall [id] '; [NL] + | 'fnext '* [repeat al_expr] '; [NL] + | 'fgoto '* [repeat al_expr] '; [NL] + | 'fcall '* [repeat al_expr] '; [NL] + | 'fret '; [NL] +end define + +define machine_def + [id] '= [machine_expr] '; [NL] +end define + +define machine_inst + [id] ':= [machine_expr] '; [NL] +end define + +define machine_expr + [repeat machine_expr_item] +end define + +define scanner_item + [repeat machine_expr_item] '; [NL] +end define + +define machine_expr_item + [action_embed] [al_host_block] + | '|* [repeat scanner_item] '*| + | [not ';] [not '*|] [token] +end define + +define al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] +end define + +define action_embed + '> | '$ | '@ | '% | + '$! | '=> +end define + diff --git a/test/tokstart1.rl b/test/tokstart1.rl new file mode 100644 index 0000000..b6df225 --- /dev/null +++ b/test/tokstart1.rl @@ -0,0 +1,241 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +extern char buf[]; + +struct Scanner +{ + int cs, act; + char *tokstart, *tokend; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Scanner; + + action to_act { + cout << "to: fc = "; + if ( fc == '\'' ) + cout << (int)fc; + else + cout << fc; + cout << " tokstart = " << ( tokstart == 0 ? -1 : tokstart-buf ) << endl; + } + action from_act { + cout << "from: fc = "; + if ( fc == '\'' ) + cout << (int)fc; + else + cout << fc; + cout << " tokstart = " << ( tokstart == 0 ? -1 : tokstart-buf ) << endl; + } + + c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act; + cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) $~ to_act $* from_act; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act; + + # Integer hex. Leading 0 buffered by float. + ( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act; + + # Three char compounds, first item already buffered. */ + ( '...' ) $~ to_act $* from_act; + + # Single char symbols. + ( punct - [_"'] ) $~ to_act $* from_act; + + # Comments and whitespace. + ( '/*' ) $~ to_act $* from_act { fgoto c_comm; }; + ( '//' ) $~ to_act $* from_act { fgoto cxx_comm; }; + + ( any - 33..126 )+ $~ to_act $* from_act; + + *|; +}%% + +%% write data; + +void Scanner::init( ) +{ + %% write init; +} + +int Scanner::execute( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + + %% write exec; + + int have = 0; + if ( tokstart != 0 ) { + have = pe - tokstart; + memmove( data, tokstart, have ); + } + return have; +} + +int Scanner::finish( ) +{ + %% write eof; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +void test( ) +{ + int len = strlen( buf ); + Scanner scanner; + + scanner.init(); + scanner.execute( buf, len ); + if ( scanner.cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } + scanner.finish(); +} + +char buf[4096]; + +int main() +{ + strcpy( buf, + "a b 0.98 /*\n" + "9 */'\\''//hi\n" + "there\n" + ); + test(); + return 0; +} + +#ifdef _____OUTPUT_____ +from: fc = a tokstart = 0 +to: fc = a tokstart = 0 +from: fc = tokstart = 0 +to: fc = a tokstart = -1 +from: fc = tokstart = 1 +to: fc = tokstart = 1 +from: fc = b tokstart = 1 +to: fc = tokstart = -1 +from: fc = b tokstart = 2 +to: fc = b tokstart = 2 +from: fc = tokstart = 2 +to: fc = b tokstart = -1 +from: fc = tokstart = 3 +to: fc = tokstart = 3 +from: fc = 0 tokstart = 3 +to: fc = tokstart = -1 +from: fc = 0 tokstart = 4 +to: fc = 0 tokstart = 4 +from: fc = . tokstart = 4 +to: fc = . tokstart = 4 +from: fc = 9 tokstart = 4 +to: fc = 9 tokstart = 4 +from: fc = 8 tokstart = 4 +to: fc = 8 tokstart = 4 +from: fc = tokstart = 4 +to: fc = 8 tokstart = -1 +from: fc = tokstart = 8 +to: fc = tokstart = 8 +from: fc = / tokstart = 8 +to: fc = tokstart = -1 +from: fc = / tokstart = 9 +to: fc = / tokstart = 9 +from: fc = * tokstart = 9 +to: fc = * tokstart = -1 +from: fc = + tokstart = -1 +to: fc = + tokstart = -1 +from: fc = 9 tokstart = -1 +to: fc = 9 tokstart = -1 +from: fc = tokstart = -1 +to: fc = tokstart = -1 +from: fc = * tokstart = -1 +to: fc = * tokstart = -1 +from: fc = / tokstart = -1 +to: fc = / tokstart = -1 +from: fc = 39 tokstart = 16 +to: fc = 39 tokstart = 16 +from: fc = \ tokstart = 16 +to: fc = \ tokstart = 16 +from: fc = 39 tokstart = 16 +to: fc = 39 tokstart = 16 +from: fc = 39 tokstart = 16 +to: fc = 39 tokstart = -1 +from: fc = / tokstart = 20 +to: fc = / tokstart = 20 +from: fc = / tokstart = 20 +to: fc = / tokstart = -1 +from: fc = h tokstart = -1 +to: fc = h tokstart = -1 +from: fc = i tokstart = -1 +to: fc = i tokstart = -1 +from: fc = + tokstart = -1 +to: fc = + tokstart = -1 +from: fc = t tokstart = 25 +to: fc = t tokstart = 25 +from: fc = h tokstart = 25 +to: fc = h tokstart = 25 +from: fc = e tokstart = 25 +to: fc = e tokstart = 25 +from: fc = r tokstart = 25 +to: fc = r tokstart = 25 +from: fc = e tokstart = 25 +to: fc = e tokstart = 25 +from: fc = + tokstart = 25 +to: fc = e tokstart = -1 +from: fc = + tokstart = 30 +to: fc = + tokstart = 30 +#endif diff --git a/test/union.rl b/test/union.rl new file mode 100644 index 0000000..6ad7e9a --- /dev/null +++ b/test/union.rl @@ -0,0 +1,189 @@ +/* + * @LANG: c++ + * Show off concurrent abilities. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct Concurrent +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Concurrent; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + action start_comment { + start_comment = cur_char; + } + action end_comment { + cout << "comment: " << start_comment << + " " << cur_char-1 << endl; + } + + action start_literal { + start_literal = cur_char; + } + action end_literal { + cout << "literal: " << start_literal << + " " << cur_char-1 << endl; + } + + # Count characters. + chars = ( any @next_char )*; + + # Words are non-whitespace. + word = ( any-space )+ >start_word %end_word; + words = ( ( word | space ) $1 %0 )*; + + # Finds C style comments. + comment = ( '/*' any* $0 '*/'@1 ) >start_comment %end_comment; + comments = ( ( comment | any ) $1 %0 )*; + + # Finds single quoted strings. + literalChar = ( any - ['\\] ) | ( '\\' . any ); + literal = ('\'' literalChar* '\'' ) >start_literal %end_literal; + literals = ( ( literal | (any-'\'') ) $1 %0 )*; + + main := chars | words | comments | literals; +}%% + +%% write data; + +void Concurrent::init( ) +{ + cur_char = 0; + %% write init; +} + +void Concurrent::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +int Concurrent::finish( ) +{ + %% write eof; + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +void test( char *buf ) +{ + Concurrent concurrent; + concurrent.init(); + concurrent.execute( buf, strlen(buf) ); + if ( concurrent.finish() > 0 ) + cout << "ACCEPT" << endl; + else + cout << "FAIL" << endl; +} + +int main() +{ + test( + "/* in a comment,\n" + " * ' and now in a literal string\n" + " */ \n" + " \n" + "the comment has now ended but the literal string lives on\n" + "\n" + "' comment closed\n" ); + test( "/* * ' \\' */ \\' '\n" ); + test( "/**/'\\''/*/*/\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +word: 1 2 +word: 4 5 +word: 7 7 +word: 9 16 +word: 19 19 +word: 21 21 +word: 23 25 +word: 27 29 +word: 31 32 +word: 34 34 +word: 36 42 +word: 44 49 +word: 52 53 +comment: 1 53 +word: 58 60 +word: 62 68 +word: 70 72 +word: 74 76 +word: 78 82 +word: 84 86 +word: 88 90 +word: 92 98 +word: 100 105 +word: 107 111 +word: 113 114 +word: 117 117 +literal: 21 117 +word: 119 125 +word: 127 132 +ACCEPT +word: 1 2 +word: 4 4 +word: 6 6 +word: 8 9 +word: 11 12 +comment: 1 12 +word: 14 15 +word: 17 17 +literal: 6 17 +ACCEPT +comment: 1 4 +literal: 5 8 +word: 1 13 +comment: 9 13 +ACCEPT +#endif diff --git a/test/xml.rl b/test/xml.rl new file mode 100644 index 0000000..ca13f43 --- /dev/null +++ b/test/xml.rl @@ -0,0 +1,108 @@ +/* + * XML parser based on the XML 1.0 BNF from: + * http://www.jelks.nu/XML/xmlebnf.html + * + * @LANG: c++ + * @ALLOW_MINFLAGS: -l -e + * @ALLOW_GENFLAGS: -T0 -T1 + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct XML +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs, top, stack[1024]; + + int init( ); + int execute( const unsigned short *data, int len ); + int finish( ); +}; + +%%{ + machine XML; + alphtype unsigned short; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + Extender = 0x00B7 | 0x02D0 | 0x02D1 | 0x0387 | 0x0640 | 0x0E46 | 0x0EC6 | 0x3005 | (0x3031..0x3035) | (0x309D..0x309E) | (0x30FC..0x30FE); + + Digit = (0x0030..0x0039) | (0x0660..0x0669) | (0x06F0..0x06F9) | (0x0966..0x096F) | (0x09E6..0x09EF) | (0x0A66..0x0A6F) | (0x0AE6..0x0AEF) | (0x0B66..0x0B6F) | (0x0BE7..0x0BEF) | (0x0C66..0x0C6F) | (0x0CE6..0x0CEF) | (0x0D66..0x0D6F) | (0x0E50..0x0E59) | (0x0ED0..0x0ED9) | (0x0F20..0x0F29); + + CombiningChar = (0x0300..0x0345) | (0x0360..0x0361) | (0x0483..0x0486) | (0x0591..0x05A1) | (0x05A3..0x05B9) | (0x05BB..0x05BD) | 0x05BF | (0x05C1..0x05C2) | 0x05C4 | (0x064B..0x0652) | 0x0670 | (0x06D6..0x06DC) | (0x06DD..0x06DF) | (0x06E0..0x06E4) | (0x06E7..0x06E8) | (0x06EA..0x06ED) | (0x0901..0x0903) | 0x093C | (0x093E..0x094C) | 0x094D | (0x0951..0x0954) | (0x0962..0x0963) | (0x0981..0x0983) | 0x09BC | 0x09BE | 0x09BF | (0x09C0..0x09C4) | (0x09C7..0x09C8) | (0x09CB..0x09CD) | 0x09D7 | (0x09E2..0x09E3) | 0x0A02 | 0x0A3C | 0x0A3E | 0x0A3F | (0x0A40..0x0A42) | (0x0A47..0x0A48) | (0x0A4B..0x0A4D) | (0x0A70..0x0A71) | (0x0A81..0x0A83) | 0x0ABC | (0x0ABE..0x0AC5) | (0x0AC7..0x0AC9) | (0x0ACB..0x0ACD) | (0x0B01..0x0B03) | 0x0B3C | (0x0B3E..0x0B43) | (0x0B47..0x0B48) | (0x0B4B..0x0B4D) | (0x0B56..0x0B57) | (0x0B82..0x0B83) | (0x0BBE..0x0BC2) | (0x0BC6..0x0BC8) | (0x0BCA..0x0BCD) | 0x0BD7 | (0x0C01..0x0C03) | (0x0C3E..0x0C44) | (0x0C46..0x0C48) | (0x0C4A..0x0C4D) | (0x0C55..0x0C56) | (0x0C82..0x0C83) | (0x0CBE..0x0CC4) | (0x0CC6..0x0CC8) | (0x0CCA..0x0CCD) | (0x0CD5..0x0CD6) | (0x0D02..0x0D03) | (0x0D3E..0x0D43) | (0x0D46..0x0D48) | (0x0D4A..0x0D4D) | 0x0D57 | 0x0E31 | (0x0E34..0x0E3A) | (0x0E47..0x0E4E) | 0x0EB1 | (0x0EB4..0x0EB9) | (0x0EBB..0x0EBC) | (0x0EC8..0x0ECD) | (0x0F18..0x0F19) | 0x0F35 | 0x0F37 | 0x0F39 | 0x0F3E | 0x0F3F | (0x0F71..0x0F84) | (0x0F86..0x0F8B) | (0x0F90..0x0F95) | 0x0F97 | (0x0F99..0x0FAD) | (0x0FB1..0x0FB7) | 0x0FB9 | (0x20D0..0x20DC) | 0x20E1 | (0x302A..0x302F) | 0x3099 | 0x309A; + + Ideographic = (0x4E00..0x9FA5) | 0x3007 | (0x3021..0x3029); + + BaseChar = (0x0041..0x005A) | (0x0061..0x007A) | (0x00C0..0x00D6) | (0x00D8..0x00F6) | (0x00F8..0x00FF) | (0x0100..0x0131) | (0x0134..0x013E) | (0x0141..0x0148) | (0x014A..0x017E) | (0x0180..0x01C3) | (0x01CD..0x01F0) | (0x01F4..0x01F5) | (0x01FA..0x0217) | (0x0250..0x02A8) | (0x02BB..0x02C1) | 0x0386 | (0x0388..0x038A) | 0x038C | (0x038E..0x03A1) | (0x03A3..0x03CE) | (0x03D0..0x03D6) | 0x03DA | 0x03DC | 0x03DE | 0x03E0 | (0x03E2..0x03F3) | (0x0401..0x040C) | (0x040E..0x044F) | (0x0451..0x045C) | (0x045E..0x0481) | (0x0490..0x04C4) | (0x04C7..0x04C8) | (0x04CB..0x04CC) | (0x04D0..0x04EB) | (0x04EE..0x04F5) | (0x04F8..0x04F9) | (0x0531..0x0556) | 0x0559 | (0x0561..0x0586) | (0x05D0..0x05EA) | (0x05F0..0x05F2) | (0x0621..0x063A) | (0x0641..0x064A) | (0x0671..0x06B7) | (0x06BA..0x06BE) | (0x06C0..0x06CE) | (0x06D0..0x06D3) | 0x06D5 | (0x06E5..0x06E6) | (0x0905..0x0939) | 0x093D | (0x0958..0x0961) | (0x0985..0x098C) | (0x098F..0x0990) | (0x0993..0x09A8) | (0x09AA..0x09B0) | 0x09B2 | (0x09B6..0x09B9) | (0x09DC..0x09DD) | (0x09DF..0x09E1) | (0x09F0..0x09F1) | (0x0A05..0x0A0A) | (0x0A0F..0x0A10) | (0x0A13..0x0A28) | (0x0A2A..0x0A30) | (0x0A32..0x0A33) | (0x0A35..0x0A36) | (0x0A38..0x0A39) | (0x0A59..0x0A5C) | 0x0A5E | (0x0A72..0x0A74) | (0x0A85..0x0A8B) | 0x0A8D | (0x0A8F..0x0A91) | (0x0A93..0x0AA8) | (0x0AAA..0x0AB0) | (0x0AB2..0x0AB3) | (0x0AB5..0x0AB9) | 0x0ABD | 0x0AE0 | (0x0B05..0x0B0C) | (0x0B0F..0x0B10) | (0x0B13..0x0B28) | (0x0B2A..0x0B30) | (0x0B32..0x0B33) | (0x0B36..0x0B39) | 0x0B3D | (0x0B5C..0x0B5D) | (0x0B5F..0x0B61) | (0x0B85..0x0B8A) | (0x0B8E..0x0B90) | (0x0B92..0x0B95) | (0x0B99..0x0B9A) | 0x0B9C | (0x0B9E..0x0B9F) | (0x0BA3..0x0BA4) | (0x0BA8..0x0BAA) | (0x0BAE..0x0BB5) | (0x0BB7..0x0BB9) | (0x0C05..0x0C0C) | (0x0C0E..0x0C10) | (0x0C12..0x0C28) | (0x0C2A..0x0C33) | (0x0C35..0x0C39) | (0x0C60..0x0C61) | (0x0C85..0x0C8C) | (0x0C8E..0x0C90) | (0x0C92..0x0CA8) | (0x0CAA..0x0CB3) | (0x0CB5..0x0CB9) | 0x0CDE | (0x0CE0..0x0CE1) | (0x0D05..0x0D0C) | (0x0D0E..0x0D10) | (0x0D12..0x0D28) | (0x0D2A..0x0D39) | (0x0D60..0x0D61) | (0x0E01..0x0E2E) | 0x0E30 | (0x0E32..0x0E33) | (0x0E40..0x0E45) | (0x0E81..0x0E82) | 0x0E84 | (0x0E87..0x0E88) | 0x0E8A | 0x0E8D | (0x0E94..0x0E97) | (0x0E99..0x0E9F) | (0x0EA1..0x0EA3) | 0x0EA5 | 0x0EA7 | (0x0EAA..0x0EAB) | (0x0EAD..0x0EAE) | 0x0EB0 | (0x0EB2..0x0EB3) | 0x0EBD | (0x0EC0..0x0EC4) | (0x0F40..0x0F47) | (0x0F49..0x0F69) | (0x10A0..0x10C5) | (0x10D0..0x10F6) | 0x1100 | (0x1102..0x1103) | (0x1105..0x1107) | 0x1109 | (0x110B..0x110C) | (0x110E..0x1112) | 0x113C | 0x113E | 0x1140 | 0x114C | 0x114E | 0x1150 | (0x1154..0x1155) | 0x1159 | (0x115F..0x1161) | 0x1163 | 0x1165 | 0x1167 | 0x1169 | (0x116D..0x116E) | (0x1172..0x1173) | 0x1175 | 0x119E | 0x11A8 | 0x11AB | (0x11AE..0x11AF) | (0x11B7..0x11B8) | 0x11BA | (0x11BC..0x11C2) | 0x11EB | 0x11F0 | 0x11F9 | (0x1E00..0x1E9B) | (0x1EA0..0x1EF9) | (0x1F00..0x1F15) | (0x1F18..0x1F1D) | (0x1F20..0x1F45) | (0x1F48..0x1F4D) | (0x1F50..0x1F57) | 0x1F59 | 0x1F5B | 0x1F5D | (0x1F5F..0x1F7D) | (0x1F80..0x1FB4) | (0x1FB6..0x1FBC) | 0x1FBE | (0x1FC2..0x1FC4) | (0x1FC6..0x1FCC) | (0x1FD0..0x1FD3) | (0x1FD6..0x1FDB) | (0x1FE0..0x1FEC) | (0x1FF2..0x1FF4) | (0x1FF6..0x1FFC) | 0x2126 | (0x212A..0x212B) | 0x212E | (0x2180..0x2182) | (0x3041..0x3094) | (0x30A1..0x30FA) | (0x3105..0x312C) | (0xAC00..0xD7A3); + + # Full Unicode 3.1 requires: Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD) | (0x10000..0x10FFFF); + + Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD); + + Letter = BaseChar | Ideographic; + + NameChar = Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender; + + include CommonXml "xmlcommon.rl"; + +}%% + + %% write data; + + int XML::init( ) + { + %% write init; + cur_char = 0; + return 1; + } + + int XML::execute( const unsigned short *data, int len ) + { + const unsigned short *p = data; + const unsigned short *pe = data + len; + + %% write exec; + + if ( cs == XML_error ) + return -1; + if ( cs >= XML_first_final ) + return 1; + return 0; + } + + int XML::finish( ) + { + %% write eof; + if ( cs == XML_error ) + return -1; + if ( cs >= XML_first_final ) + return 1; + return 0; + } + + int main() + { + return 0; + } +/* _____OUTPUT_____ +_____OUTPUT_____ */ diff --git a/test/xmlcommon.rl b/test/xmlcommon.rl new file mode 100644 index 0000000..e7a855e --- /dev/null +++ b/test/xmlcommon.rl @@ -0,0 +1,205 @@ +/* + * This file is included by xml.rl + * + * @IGNORE: yes + */ + +%%{ + + # + # Common XML grammar rules based on the XML 1.0 BNF from: + # http://www.jelks.nu/XML/xmlebnf.html + # + + machine CommonXml; + + S = (0x20 | 0x9 | 0xD | 0xA)+; + + # WAS PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]; + PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [\-'()+,./:=?;!*#@$_%]; + + PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"; + + Name = (Letter | '_' | ':') (NameChar)*; + + Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'; + + # Used strong subtraction operator, and replaced * with +. Ragel complained since using + # * results in a machine that accepts 0 length strings, and later it's only used in an + # optional construct anyway. + # + CharData_Old = [^<&]* - ([^<&]* ']]>' [^<&]*); + CharData = [^<&]+ -- ']]>'; + + SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'"); + + Eq = S? '=' S?; + + VersionNum = ([a-zA-Z0-9_.:] | '-')+; + + # WAS S 'version' Eq (' VersionNum ' | " VersionNum ") - fixed quotes + VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"'); + + ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral; + + PublicID = 'PUBLIC' S PubidLiteral; + + NotationDecl = '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'; + + EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*; + + EncodingDecl = S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ); + + # UNUSED TextDecl = '<?xml' VersionInfo? EncodingDecl S? '?>'; + + NDataDecl = S 'NDATA' S Name; + + PEReference = '%' Name ';'; + + EntityRef = '&' Name ';'; + + CharRef = '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';'; + + Reference = EntityRef | CharRef; + + EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"; + + PEDef = EntityValue | ExternalID; + + EntityDef = EntityValue | (ExternalID NDataDecl?); + + PEDecl = '<!ENTITY' S '%' S Name S PEDef S? '>'; + + GEDecl = '<!ENTITY' S Name S EntityDef S? '>'; + + EntityDecl = GEDecl | PEDecl; + + Mixed = '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'; + + # WAS cp = (Name | choice | seq) ('?' | '*' | '+')?; + + # WAS seq = '(' S? cp ( S? ',' S? cp )* S? ')'; + + # WAS choice = '(' S? cp ( S? '|' S? cp )* S? ')'; + + # WAS children = (choice | seq) ('?' | '*' | '+')?; + + # TODO put validation for this in and make it clearer + alt = '?' | '*' | '+'; + children = '(' S? + ( ( Name alt? ) | + '(' | + ( ')' alt? ) | + [,|] | + S ) + ')' alt?; + + contentspec = 'EMPTY' | 'ANY' | Mixed | children; + + elementdecl = '<!ELEMENT' S Name S contentspec S? '>'; + + AttValue = '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"; + + Attribute = Name Eq AttValue; + + Nmtoken = (NameChar)+; + + # UNUSED Nmtokens = Nmtoken (S Nmtoken)*; + + Enumeration = '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'; + + NotationType = 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'; + + EnumeratedType = NotationType | Enumeration; + + TokenizedType = 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'; + + StringType = 'CDATA'; + + AttType = StringType | TokenizedType | EnumeratedType; + + DefaultDecl = '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue); + + AttDef = S Name S AttType S DefaultDecl; + + AttlistDecl = '<!ATTLIST' S Name AttDef* S? '>'; + + EmptyElemTag = '<' Name (S Attribute)* S? '/>'; + + ETag = '</' Name S? '>'; + + PITarget_Old = Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')); + PITarget = Name -- "xml"i; + + PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'; + + markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment; + + doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'; + + # TODO extSubsetDecl = ( markupdecl | conditionalSect | PEReference | S )*; + # UNUSED extSubsetDecl = ( markupdecl | PEReference | S )*; + + # UNUSED extSubset = TextDecl? extSubsetDecl; + + # UNUSED Ignore = Char* - (Char* ('<![' | ']]>') Char*); + + # TODO: ignoreSectContents = Ignore ('<![' ignoreSectContents ']]>' Ignore)*; + # UNUSED ignoreSectContents = Ignore ('<![' ']]>' Ignore)*; + + # UNUSED ignoreSect = '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'; + + # UNUSED includeSect = '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'; + + # UNUSED conditionalSect = includeSect | ignoreSect; + + STag = '<' Name (S Attribute)* S? '>'; + + CDStart = '<![CDATA['; + + CDEnd = ']]>'; + + # WAS CData = (Char* - (Char* ']]>' Char*)); + CData = (Char* -- CDEnd); + + CDSect = CDStart CData CDEnd; + + # UNUSED Subcode = ([a-z] | [A-Z])+; + + # UNUSED UserCode = ('x' | 'X') '-' ([a-z] | [A-Z])+; + + # UNUSED IanaCode = ('i' | 'I') '-' ([a-z] | [A-Z])+; + + # UNUSED ISO639Code = ([a-z] | [A-Z]) ([a-z] | [A-Z]); + + # UNUSED Langcode = ISO639Code | IanaCode | UserCode; + + # UNUSED LanguageID = Langcode ('-' Subcode)*; + + SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')); + + # UNUSED extPE = TextDecl? extSubsetDecl; + + Misc = Comment | PI | S; + + XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'; + + prolog = XMLDecl? Misc* (doctypedecl Misc*)?; + + # UNUSED Names = Name (S Name)*; + + # Added fcall - TODO check logic is correct + # UNUSED extParsedEnt = TextDecl? @{fcall content;}; + + # TODO tag stack validation + + # WAS element = EmptyElemTag | STag content ETag + # WAS content = (element | CharData | Reference | CDSect | PI | Comment)*; + content = (EmptyElemTag | STag | ETag | CharData | Reference | CDSect | PI | Comment)*; + + # WAS document = prolog element Misc*; + document = prolog ( EmptyElemTag | ( STag content ETag ) ) Misc*; + + main := document; + +}%% diff --git a/version.mk b/version.mk new file mode 100644 index 0000000..0193c3d --- /dev/null +++ b/version.mk @@ -0,0 +1,2 @@ +VERSION = 5.16 +PUBDATE = November 2006 |