summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING340
-rw-r--r--CREDITS25
-rw-r--r--ChangeLog1213
-rw-r--r--Makefile.in56
-rw-r--r--README54
-rw-r--r--TODO48
-rw-r--r--aapl/COPYING502
-rw-r--r--aapl/README6
-rw-r--r--aapl/avlbasic.h65
-rw-r--r--aapl/avlcommon.h1622
-rw-r--r--aapl/avlibasic.h67
-rw-r--r--aapl/avlikeyless.h64
-rw-r--r--aapl/avlimap.h77
-rw-r--r--aapl/avlimel.h79
-rw-r--r--aapl/avlimelkey.h76
-rw-r--r--aapl/avliset.h75
-rw-r--r--aapl/avlitree.h78
-rw-r--r--aapl/avlkeyless.h58
-rw-r--r--aapl/avlmap.h74
-rw-r--r--aapl/avlmel.h74
-rw-r--r--aapl/avlmelkey.h71
-rw-r--r--aapl/avlset.h70
-rw-r--r--aapl/avltree.h73
-rw-r--r--aapl/bstcommon.h814
-rw-r--r--aapl/bstmap.h113
-rw-r--r--aapl/bstset.h86
-rw-r--r--aapl/bsttable.h84
-rw-r--r--aapl/bubblesort.h94
-rw-r--r--aapl/compare.h260
-rw-r--r--aapl/dlcommon.h790
-rw-r--r--aapl/dlist.h64
-rw-r--r--aapl/dlistmel.h71
-rw-r--r--aapl/dlistval.h71
-rw-r--r--aapl/insertsort.h94
-rw-r--r--aapl/mergesort.h140
-rw-r--r--aapl/quicksort.h185
-rw-r--r--aapl/resize.h344
-rw-r--r--aapl/sbstmap.h121
-rw-r--r--aapl/sbstset.h94
-rw-r--r--aapl/sbsttable.h93
-rw-r--r--aapl/svector.h1426
-rw-r--r--aapl/table.h252
-rw-r--r--aapl/vector.h1202
-rw-r--r--common/Makefile.in71
-rw-r--r--common/buffer.h55
-rw-r--r--common/common.cpp193
-rw-r--r--common/common.h271
-rw-r--r--common/config.h.in33
-rw-r--r--common/pcheck.h49
-rwxr-xr-xconfigure3991
-rw-r--r--configure.in118
-rw-r--r--doc/Makefile.in73
-rw-r--r--doc/RELEASE_NOTES_V286
-rw-r--r--doc/RELEASE_NOTES_V38
-rw-r--r--doc/RELEASE_NOTES_V4361
-rw-r--r--doc/RELEASE_NOTES_V5112
-rw-r--r--doc/bmconcat.fig40
-rw-r--r--doc/bmnull.fig15
-rw-r--r--doc/bmnum.fig20
-rw-r--r--doc/bmor.fig28
-rw-r--r--doc/bmrange.fig20
-rw-r--r--doc/bmregex.fig42
-rw-r--r--doc/docbook.dsl49
-rw-r--r--doc/exaction.fig37
-rw-r--r--doc/exallact.fig25
-rw-r--r--doc/exallpri.fig33
-rw-r--r--doc/exconcat.fig93
-rw-r--r--doc/exdoneact.fig24
-rw-r--r--doc/exdonepri.fig55
-rw-r--r--doc/exfinact.fig29
-rw-r--r--doc/exfinpri.fig55
-rw-r--r--doc/exinter.fig48
-rw-r--r--doc/exnegate.fig31
-rw-r--r--doc/exoption.fig37
-rw-r--r--doc/exor.fig65
-rw-r--r--doc/explus.fig23
-rw-r--r--doc/exstact.fig33
-rw-r--r--doc/exstar.fig32
-rw-r--r--doc/exstpri.fig33
-rw-r--r--doc/exstrongsubtr.fig65
-rw-r--r--doc/exsubtr.fig87
-rw-r--r--doc/opconcat.fig43
-rw-r--r--doc/opor.fig42
-rw-r--r--doc/opstar.fig49
-rw-r--r--doc/ragel-guide.tex2628
-rw-r--r--doc/ragel.1.in561
-rw-r--r--doc/rlcodegen.1.in107
-rw-r--r--doc/stembed.fig72
-rw-r--r--examples/Makefile37
-rw-r--r--examples/README40
-rw-r--r--examples/atoi/Makefile21
-rw-r--r--examples/atoi/atoi.rl60
-rw-r--r--examples/awkemu/Makefile21
-rw-r--r--examples/awkemu/awkemu.rl116
-rwxr-xr-xexamples/awkemu/awkequiv.awk10
-rw-r--r--examples/clang/Makefile21
-rw-r--r--examples/clang/clang.rl150
-rw-r--r--examples/concurrent/Makefile21
-rw-r--r--examples/concurrent/concurrent.rl126
-rw-r--r--examples/cppscan/Makefile41
-rw-r--r--examples/cppscan/cppscan.lex143
-rw-r--r--examples/cppscan/cppscan.rec183
-rw-r--r--examples/cppscan/cppscan.rl207
-rw-r--r--examples/format/Makefile21
-rw-r--r--examples/format/format.rl191
-rw-r--r--examples/gotocallret/Makefile21
-rw-r--r--examples/gotocallret/gotocallret.rl103
-rw-r--r--examples/mailbox/Makefile16
-rw-r--r--examples/mailbox/mailbox.rl206
-rw-r--r--examples/params/Makefile21
-rw-r--r--examples/params/params.rl104
-rw-r--r--examples/pullscan/Makefile23
-rw-r--r--examples/pullscan/pullscan.rl166
-rw-r--r--examples/rlscan/Makefile21
-rw-r--r--examples/rlscan/rlscan.rl298
-rw-r--r--examples/statechart/Makefile21
-rw-r--r--examples/statechart/statechart.rl114
-rw-r--r--examples/uri/uri.rl31
-rw-r--r--ragel.spec55
-rw-r--r--ragel.vim161
-rw-r--r--ragel/Makefile.in85
-rw-r--r--ragel/fsmap.cpp840
-rw-r--r--ragel/fsmattach.cpp425
-rw-r--r--ragel/fsmbase.cpp485
-rw-r--r--ragel/fsmgraph.cpp1399
-rw-r--r--ragel/fsmgraph.h1369
-rw-r--r--ragel/fsmmin.cpp732
-rw-r--r--ragel/fsmstate.cpp463
-rw-r--r--ragel/main.cpp339
-rw-r--r--ragel/parsedata.cpp1432
-rw-r--r--ragel/parsedata.h463
-rw-r--r--ragel/parsetree.cpp2111
-rw-r--r--ragel/parsetree.h761
-rw-r--r--ragel/ragel.h80
-rw-r--r--ragel/rlparse.kh122
-rw-r--r--ragel/rlparse.kl1402
-rw-r--r--ragel/rlparse.y1456
-rw-r--r--ragel/rlscan.lex1212
-rw-r--r--ragel/rlscan.rl907
-rw-r--r--ragel/xmlcodegen.cpp675
-rw-r--r--ragel/xmlcodegen.h135
-rw-r--r--rlcodegen/Makefile.in93
-rw-r--r--rlcodegen/fflatcodegen.cpp364
-rw-r--r--rlcodegen/fflatcodegen.h70
-rw-r--r--rlcodegen/fgotocodegen.cpp276
-rw-r--r--rlcodegen/fgotocodegen.h70
-rw-r--r--rlcodegen/flatcodegen.cpp777
-rw-r--r--rlcodegen/flatcodegen.h103
-rw-r--r--rlcodegen/fsmcodegen.cpp1012
-rw-r--r--rlcodegen/fsmcodegen.h297
-rw-r--r--rlcodegen/ftabcodegen.cpp418
-rw-r--r--rlcodegen/ftabcodegen.h72
-rw-r--r--rlcodegen/gendata.cpp563
-rw-r--r--rlcodegen/gendata.h159
-rw-r--r--rlcodegen/gotocodegen.cpp754
-rw-r--r--rlcodegen/gotocodegen.h106
-rw-r--r--rlcodegen/gvdotgen.cpp282
-rw-r--r--rlcodegen/gvdotgen.h55
-rw-r--r--rlcodegen/ipgotocodegen.cpp418
-rw-r--r--rlcodegen/ipgotocodegen.h92
-rw-r--r--rlcodegen/javacodegen.cpp307
-rw-r--r--rlcodegen/javacodegen.h47
-rw-r--r--rlcodegen/main.cpp441
-rw-r--r--rlcodegen/redfsm.cpp535
-rw-r--r--rlcodegen/redfsm.h474
-rw-r--r--rlcodegen/rlcodegen.h162
-rw-r--r--rlcodegen/splitcodegen.cpp518
-rw-r--r--rlcodegen/splitcodegen.h65
-rw-r--r--rlcodegen/tabcodegen.cpp996
-rw-r--r--rlcodegen/tabcodegen.h110
-rw-r--r--rlcodegen/xmlparse.kh119
-rw-r--r--rlcodegen/xmlparse.kl875
-rw-r--r--rlcodegen/xmlparse.y978
-rw-r--r--rlcodegen/xmlscan.lex433
-rw-r--r--rlcodegen/xmlscan.rl333
-rw-r--r--rlcodegen/xmltags.gperf81
-rw-r--r--test/Makefile.in30
-rw-r--r--test/README13
-rw-r--r--test/atoi1.rl69
-rw-r--r--test/atoi2.rl81
-rw-r--r--test/awkemu.rl157
-rw-r--r--test/builtin.rl1209
-rw-r--r--test/call1.rl103
-rw-r--r--test/call2.rl118
-rw-r--r--test/call3.rl123
-rw-r--r--test/clang1.rl283
-rw-r--r--test/clang2.rl324
-rw-r--r--test/clang3.rl321
-rw-r--r--test/cond1.rl68
-rw-r--r--test/cond2.rl91
-rw-r--r--test/cond3.rl59
-rw-r--r--test/cond4.rl54
-rw-r--r--test/cond5.rl59
-rw-r--r--test/cond6.rl61
-rw-r--r--test/cppscan1.h110
-rw-r--r--test/cppscan1.rl282
-rw-r--r--test/cppscan2.rl402
-rw-r--r--test/cppscan3.rl281
-rw-r--r--test/cppscan4.rl303
-rw-r--r--test/cppscan5.rl277
-rw-r--r--test/element1.rl108
-rw-r--r--test/element2.rl84
-rw-r--r--test/element3.rl144
-rw-r--r--test/eofact.h9
-rw-r--r--test/eofact.rl50
-rw-r--r--test/erract1.rl145
-rw-r--r--test/erract2.rl80
-rw-r--r--test/erract3.rl105
-rw-r--r--test/erract4.rl135
-rw-r--r--test/erract5.rl146
-rw-r--r--test/errintrans.rl84
-rw-r--r--test/forder1.rl100
-rw-r--r--test/forder2.rl135
-rw-r--r--test/forder3.rl106
-rw-r--r--test/gotocallret1.rl113
-rw-r--r--test/gotocallret2.rl77
-rw-r--r--test/high1.rl183
-rw-r--r--test/high2.rl104
-rw-r--r--test/high3.rl112
-rw-r--r--test/include1.rl28
-rw-r--r--test/include2.rl52
-rw-r--r--test/java1.rl49
-rw-r--r--test/java2.rl51
-rw-r--r--test/keller1.rl1076
-rwxr-xr-xtest/langtrans_c.sh96
-rw-r--r--test/langtrans_c.txl277
-rwxr-xr-xtest/langtrans_d.sh102
-rw-r--r--test/langtrans_d.txl256
-rwxr-xr-xtest/langtrans_java.sh100
-rw-r--r--test/langtrans_java.txl303
-rw-r--r--test/lmgoto.rl198
-rw-r--r--test/mailbox1.h33
-rw-r--r--test/mailbox1.rl252
-rw-r--r--test/mailbox2.rl173
-rw-r--r--test/mailbox3.rl247
-rw-r--r--test/minimize1.rl83
-rw-r--r--test/patact.rl91
-rw-r--r--test/range.rl76
-rw-r--r--test/repetition.rl293
-rw-r--r--test/rlscan.rl287
-rwxr-xr-xtest/runtests251
-rw-r--r--test/stateact1.rl48
-rw-r--r--test/statechart1.rl102
-rw-r--r--test/strings1.rl195
-rw-r--r--test/strings2.h9
-rw-r--r--test/strings2.rl1349
-rw-r--r--test/testcase.txl177
-rw-r--r--test/tokstart1.rl241
-rw-r--r--test/union.rl189
-rw-r--r--test/xml.rl108
-rw-r--r--test/xmlcommon.rl205
-rw-r--r--version.mk2
252 files changed, 67222 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..ec0507b
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/CREDITS b/CREDITS
new file mode 100644
index 0000000..d0b5355
--- /dev/null
+++ b/CREDITS
@@ -0,0 +1,25 @@
+
+ Ragel State Machine Compiler -- CREDITS
+ =======================================
+
+* Written by Adrian Thurston <thurston@cs.queensu.ca>.
+
+* Objective-C output contributed by Eric Ocean.
+
+* D output and many great ideas contributed by Alan West.
+
+* Conditionals inspired by David Helder.
+
+* Java code generation contributions, bug reports, fixes, test cases
+ and suggestions from Colin Fleming
+
+* Useful discussion and bug from Carlos Antunes.
+
+* Feedback, Packaging, and Fixes provided by:
+
+ Bob Tennent, Robert Lemmen, Tobias Jahn, Cris Bailiff, Buddy Betts, Scott
+ Dixon, Steven Handerson, Michael Somos, Bob Paddock, Istvan Buki, David
+ Drai, Matthias Rahlf, Zinx Verituse, Markus W. Weissmann, Marc Liyanage,
+ Eric Ocean, Alan West, Steven Kibbler, Laurent Boulard, Jon Oberheide,
+ David Helder, Lexington Luthor, Jason Jobe, Colin Fleming, Carlos Antunes,
+ Steve Horne
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..83795a6
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,1213 @@
+For Next Release
+================
+ -The '%when condition' syntax was functioning like '$when condition'. This
+ was fixed.
+
+Ragel 5.16 - Nov 20, 2006
+=========================
+ -Bug fix: the fhold and fexec directives did not function correctly in
+ scanner pattern actions. In this context manipulations of p may be lost or
+ made invalid. To fix this, fexec and fhold now manipulate tokend, which is
+ now always used to update p when the action terminates.
+
+Ragel 5.15 - Oct 31, 2006
+=========================
+ -A language independent test harness was introduced. Test cases can be
+ written using a custom mini-language in the embedded actions. This
+ mini-language is then translated to C, D and Java when generating the
+ language-specific test cases.
+ -Several existing tests have been ported to the language-independent format
+ and a number of new language-independent test cases have been added.
+ -The state-based embedding operators which access states that are not the
+ start state and are not final (the 'middle' states) have changed. They
+ were:
+ <@/ eof action into middle states
+ <@! error action into middle states
+ <@^ local error action into middle states
+ <@~ to-state action into middle states
+ <@* from-state action into middle states
+ They are now:
+ <>/ eof action into middle states
+ <>! error action into middle states
+ <>^ local error action into middle states
+ <>~ to-state action into middle states
+ <>* from-state action into middle states
+ -The verbose form of embeddings using the <- operator have been removed.
+ This syntax was difficult to remember.
+ -A new verbose form of state-based embedding operators have been added.
+ These are like the symbol versions, except they replace the symbols:
+ / ! ^ ~ *
+ with literal keywords:
+ eof err lerr to from
+ -The following words have been promoted to keywords:
+ when eof err lerr to from
+ -The write statment now gets its own lexical scope in the scanner to ensure
+ that commands are passed through as is (not affected by keywords).
+ -Bug fix: in the code generation of fret in scanner actions the adjustment to
+ p that is needed in some cases (dependent on content of patterns) was not
+ happening.
+ -The fhold directive, which decrements p, cannot be permitted in the pattern
+ action of a scanner item because it will not behave consistently. At the end
+ of a pattern action p could be decremented, set to a new value or left
+ alone. This depends on the contents of the scanner's patterns. The user
+ cannot be expected to predict what will happen to p.
+ -Conditions in D code require a cast to the widec type when computing widec.
+ -Like Java, D code also needs if (true) branches for control flow in actions
+ in order to fool the unreachable code detector. This is now abstracted in
+ all code generators using the CTRL_FLOW() function.
+ -The NULL_ITEM value in java code should be -1. This is needed for
+ maintaining tokstart.
+
+Ragel 5.14 - Oct 1, 2006
+========================
+ -Fixed the check for use of fcall in actions embedded within longest match
+ items. It was emitting an error if an item's longest-match action had an
+ fcall, which is allowed. This bug was introduced while fixing a segfault in
+ version 5.8.
+ -A new minimization option was added: MinimizeMostOps (-l). This option
+ minimizes at every operation except on chains of expressions and chains of
+ terms (eg, union and concat). On these chains it minimizes only at the last
+ operation. This makes test cases with many states compile faster, without
+ killing the performance on grammars like strings2.rl.
+ -The -l minimiziation option was made the default.
+ -Fixes to Java code: Use of the fc value did not work, now fixed. Static data
+ is now declared with the final keyword. Patch from Colin Fleming. Conditions
+ now work when generating Java code.
+ -The option -p was added to rlcodegen which causes printable characters to be
+ printed in GraphViz output. Patch from Colin Fleming.
+ -The "element" keyword no longer exists, removed from vim syntax file.
+ Updated keyword highlighting.
+ -The host language selection is now made in the frontend.
+ -Native host language types are now used when specifying the alphtype.
+ Previously all languages used the set defined by C, and these were mapped to
+ the appropriate type in the backend.
+
+Ragel 5.13 - Sep 7, 2006
+========================
+ -Fixed a careless error which broke Java code generation.
+
+Ragel 5.12 - Sep 7, 2006
+========================
+ -The -o flag did not work in combination with -V. This was fixed.
+ -The split code generation format uses only the required number of digits
+ when writing out the number in the file name of each part.
+ -The -T0, -F0 and -G0 codegens should write out the action list iteration
+ variables only when there are regular, to state or from state actions. The
+ code gens should not use anyActions().
+ -If two states have the same EOF actions, they are written out in the finish
+ routine as one case.
+ -The split and in-place goto formats would sometimes generate _out when it is
+ not needed. This was fixed.
+ -Improved the basic partitioning in the split code gen. The last partition
+ would sometimes be empty. This was fixed.
+ -Use of 'fcall *' was not causing top to be initialized. Fixed.
+ -Implemented a Java backend, specified with -J. Only the table-based format
+ is supported.
+ -Implemented range compression in the frontend. This has no effect on the
+ generated code, however it reduces the work of the backend and any programs
+ that read the intermediate format.
+
+Ragel 5.11 - Aug 10, 2006
+=========================
+ -Added a variable to the configure.in script which allows the building of
+ the parsers to be turned off (BUILD_PARSERS). Parser building is off by
+ default for released versions.
+ -Removed configure tests for bison defines header file. Use --defines=file
+ instead.
+ -Configure script doesn't test for bison, flex and gperf when building of the
+ parsers is turned off.
+ -Removed check for YYLTYPE structure from configure script. Since shipped
+ code will not build parsers by default, we don't need to be as accomodating
+ of other versions of bison.
+ -Added a missing include that showed up with g++ 2.95.3.
+ -Failed configure test for Objective-C compiler is now silent.
+
+Ragel 5.10 - Jul 31, 2006
+=========================
+ -Moved the check for error state higher in the table-based processing loop.
+ -Replaced naive implementations of condition searching with proper ones. In
+ the table-based formats the searching is also table-based. In the directly
+ executed formats the searching is also directly executable.
+ -The minimization process was made aware of conditions.
+ -A problem with the condition implementation was fixed. Previously we were
+ taking pointers to transitions and then using them after a call to
+ outTransCopy, which was a bad idea because they may be changed by the call.
+ -Added test mailbox3.rl which is based on mailbox2.rl but includes conditions
+ for restricting header and message body lengths.
+ -Eliminated the initial one-character backup of p just before resuming
+ execution.
+ -Added the -s option to the frontend for printing statistics. This currently
+ includes just the number of states.
+ -Sped up the generation of the in-place goto-driven (-G2) code style.
+ -Implemented a split version of in-place goto-driven code style. This code
+ generation style is suitable for producing fast implementations of very
+ large machines. Partitioning is currently naive. In the future a
+ high-quality partitioning program will be employed. The flag for accessing
+ this feature is -Pn, where n is the number of partitions.
+ -Converted mailbox1.rl, strings2.rl and cppscan1.rl tests to support the
+ split code generation.
+ -Fixes and updates were made to the runtests script: added -c for compiling
+ only, changed the -me option to -e, and added support for testing the split
+ code style.
+
+Ragel 5.9 - Jul 19, 2006
+========================
+ -Fixed a bug in the include system which caused malformed output from the
+ frontend when the include was made from a multi-line machine spec and the
+ included file ended in a single line spec (or vice versa).
+ -Static data is now const.
+ -Actions which referenced states but were not embedded caused the frontend to
+ segfault, now fixed.
+ -Manual now built with pdflatex.
+ -The manual was reorganized and expanded. Chapter sequence is now:
+ Introduction, Constructing Machines, Embedding Actions, Controlling
+ Nondeterminism and Interfacing to the Host program.
+
+Ragel 5.8 - Jun 17, 2006
+========================
+ -The internal representation of the alphabet type has been encapsulated
+ into a class and all operations on it have been defined as C++ operators.
+ -The condition implementation now supports range transitions. This allows
+ conditions to be embedded into arbitrary machines. Conditions are still
+ exprimental.
+ -More condition embedding operators were added
+ 1. Isolate the start state and embed a condition into all transitions
+ leaving it:
+ >when cond OR >?cond
+ 2. Embed a condition into all transitions:
+ when cond OR $when cond OR $?cond
+ 3. Embed a condition into pending out transitions:
+ %when cond OR %?cond
+ -Improvements were made to the determinization process to support pending out
+ conditions.
+ -The Vim sytax file was fixed so that :> doesn't cause the match of a label.
+ -The test suite was converted to a single-file format which uses less disk
+ space than the old directory-per-test format.
+
+Ragel 5.7 - May 14, 2006
+========================
+ -Conditions will not be embedded like actions because they involve a
+ manipulation of the state machine they are specified in. They have therefore
+ been taken out of the verbose action embedding form (using the <- compound
+ symbol). A new syntax for specifying conditions has been created:
+ m = '\n' when {i==4};
+ -Fixed a bug which prevented state machine commands like fcurs, fcall, fret,
+ etc, from being accounted for in from-state actions and to-state actions.
+ This prevented some necessary support code from being generated.
+ -Implemented condition testing in remaining code generators.
+ -Configure script now checks for gperf, which is required for building.
+ -Added support for case-insensitive literal strings (in addition to regexes).
+ A case-insensitive string is made by appending an 'i' to the literal, as in
+ 'cmd'i or "cmd"i.
+ -Fixed a bug which caused all or expressions inside of all regular
+ expressions to be case-insensitive. For example /[fo]o bar/ would make the
+ [fo] part case-insensitive even though no 'i' was given following the
+ regular expression.
+
+Ragel 5.6 - Apr 1, 2006
+=======================
+ -Added a left-guarded concatenation operator. This operator <: is equivalent
+ to ( expr1 $1 . expr2 >0 ). It is useful if you want to prefix a sequence
+ with a sequence of a subset of the characters it matches. For example, one
+ can consume leading whitespace before tokenizing a sequence of whitespace
+ separated words: ( ' '* <: ( ' '+ | [a-z]+ )** )
+ -Removed context embedding code, which has been dead since 5.0.
+
+Ragel 5.5 - Mar 28, 2006
+========================
+ -Implemented a case-insensitive option for regular expressions: /get/i.
+ -If no input file is given to the ragel program it reads from standard input.
+ -The label of the start state has been changed from START to IN to save on
+ required screen space.
+ -Bug fix: \0 was not working in literal strings, due to a change that reduced
+ memory usage by concatenating components of literal strings. Token data
+ length is now passed from the scanner to the paser so that we do not need to
+ rely on null termination.
+
+Ragel 5.4 - Mar 12, 2006
+========================
+ -Eliminated the default transition from the frontend implementation. This
+ default transition was a space-saving optimization that at best could reduce
+ the number of allocated transitions by one half. Unfortunately it
+ complicated the implementation and this stood in the way of introducing
+ conditionals. The default transition may be reintroduced in the future.
+ -Added entry-guarded concatenation. This operator :>, is syntactic sugar
+ for expr1 $0 . expr >1. This operator terminates the matching of the first
+ machine when a first character of the second machine is matched. For
+ example in any* . ';' we never leave the any* machine. If we use any* :> ';'
+ then the any* machine is terminiated upon matching the semi-colon.
+ -Added finish-guarded concatenation. This operator :>>, is syntactic sugar
+ for expr1 $0 . expr @1. This operator is like entry guarded concatenation
+ except the first machine is terminated when the second machine enters a
+ final state. This is useful for delaying the guard until a full pattern is
+ matched. For example as in '/*' any* :>> '*/'.
+ -Added strong subtraction. Where regular subtraction removes from the first
+ machine any strings that are matched by the second machine, strong
+ subtraction removes any strings from the first that contain any strings of
+ the second as a substring. Strong subtraction is syntactic sugar for
+ expr1 - ( any* expr2 any* ).
+ -Eliminated the use of priorities from the examples. Replaced with
+ subtraction, guarded concatenation and longest-match kleene star.
+ -Did some initial work on supporting conditional transitions. Far from
+ complete and very buggy. This code will only be active when conditionals are
+ used.
+
+Ragel 5.3 - Jan 27, 2006
+========================
+ -Added missing semi-colons that cause the build to fail when using older
+ versions of Bison.
+ -Fix for D code: if the contents of an fexec is a single word, the generated
+ code will get interpreted as a C-style cast. Adding two brackets prevents
+ this. Can now turn eliminate the "access this.;" in cppscan5 that was used to
+ get around this problem.
+ -Improved some of the tag names in the intermediate format.
+ -Added unsigned long to the list of supported alphabet types.
+ -Added ids of actions and action lists to XML intermediate format. Makes it
+ more human readable.
+ -Updated to latest Aapl package.
+
+Ragel 5.2 - Jan 6, 2006
+========================
+ -Ragel emits an error if the target of fentry, fcall, fgoto or fnext is inside
+ a longest match operator, or if an action embedding in a longest match
+ machine uses fcall. The fcall command can still be used in pattern actions.
+ -Made improvements to the clang, rlscan, awkemu and cppscan examples.
+ -Some fixes to generated label names: they should all be prefixed with _.
+ -A fix to the Vim syntax highlighting script was made
+ -Many fixes and updates to the documentation. All important features and
+ concepts are now documented. A second chapter describing Ragel's use
+ was added.
+
+Ragel 5.1 - Dec 22, 2005
+========================
+ -Fixes to the matching of section delimiters in Vim syntax file.
+ -If there is a longest match machine, the tokend var is now initialized by
+ write init. This is not necessary for correct functionality, however
+ prevents compiler warnings.
+ -The rlscan example was ported to the longest match operator and changed to
+ emit XML data.
+ -Fix to the error handling in the frontend: if there are errors in the lookup
+ of names at machine generation time then do not emit anything.
+ -If not compiling the full machine in the frontend (by using -M), avoid
+ errors and segfaults caused by names that are not part of the compiled
+ machine.
+ -Longest match bug fix: need to init tokstart when returing from fsm calls
+ that are inside longest match actions.
+ -In Graphviz drawing, the arrow into the start state is not a real
+ transition, do not draw to-state actions on the label.
+ -A bug fix to the handling of non-tag data within an XML tag was made.
+ -Backend exit value fixed: since the parser now accepts nothing so as to
+ avoid a redundant parse error when the frontend dies, we must force an
+ error. The backend should now be properly reporting errors.
+ -The longest match machine now has it's start state set final. An LM machine
+ is in a final state when it has not matched anything, when it has matched
+ and accepted a token and is ready for another, and when it has matched a
+ token but is waiting for some lookahead before determining what to do about
+ it (similar to kleene star).
+ -Element statement removed from some tests.
+ -Entry point names are propagated to the backend and used to label the entry
+ point arrows in Graphviz output.
+
+Ragel 5.0 - Dec 17, 2005
+========================
+ (additional details in V5 release notes)
+ -Ragel has been split into two executables: A frontend which compiles
+ machines and emits them in an XML format, and a backend which generates code
+ or a Graphviz dot file from the XML input. The purpose of this split is to
+ allow Ragel to interface with other tools by means of the XML intermediate
+ format and to reduce complexity by strictly separating the previously
+ entangled phases. The intermediate format will provide a better platform
+ inspecting compiled machines and for extending Ragel to support other host
+ languages.
+ -The host language interface has been reduced significantly. Ragel no longer
+ expects the machine to be implemented as a structure or class and does not
+ generate functions corresponding to initialization, execution and EOF.
+ Instead, Ragel just generates the code of these components, allowing all of
+ them to be placed in a single function if desired. The user specifies a
+ machine in the usual manner, then indicates at which place in the program
+ text the state machine code is to be generated. This is done using the write
+ statement. It is possible to specify to Ragel how it should access the
+ variables it needs (such as the current state) using the access statement.
+ -The host language embedding delimiters have been changed. Single line
+ machines start with '%%' and end at newline. Multiline machines start with
+ '%%{' and end with '}%%'. The machine name is given with the machine
+ statement at the very beginning of the specification. This purpose of this
+ change is to make it easier separate Ragel code from the host language. This
+ will ease the addition of supported host languages.
+ -The structure and class parsing which was previously able to extract a
+ machine's name has been removed since this feature is dependent on the host
+ language and inhibits the move towards a more language-independent frontend.
+ -The init, element and interface statements have been made obsolete by the
+ new host language interface and have been removed.
+ -The fexec action statement has been changed to take only the new position to
+ move to. This statement is more useful for moving backwards and reparsing
+ input than for specifying a whole new buffer entirely and has been shifted
+ to this new use. Giving it only one argument also simplifies the parsing of
+ host code embedded in a Ragel specification. This will ease the addition of
+ supported host languages.
+ -Introduced the fbreak statement, which allows one to stop processing data
+ immediately. The machine ends up in the state that the current transition
+ was to go to. The current character is not changed.
+ -Introduced the noend option for writing the execute code. This inhibits
+ checking if we have reached pe. The machine will run until it goes into the
+ error state or fbreak is hit. This allows one to parse null-terminate
+ strings without first computing the length.
+ -The execute code now breaks out of the processing loop when it moves into
+ the error state. Previously it would run until pe was hit. Breaking out
+ makes the noend option useful when an error is encountered and allows
+ user code to determine where in the input the error occured. It also
+ eliminates needlessly iterating the input buffer.
+ -Introduced the noerror, nofinal and noprefix options for writing the machine
+ data. The first two inhibit the writing of the error state and the
+ first-final state should they not be needed. The noprefix eliminates the
+ prefixing of the data items with the machine name.
+ -Support for the D language has been added. This is specified in the backend
+ with the -D switch.
+ -Since the new host language interface has been reduced considerably, Ragel
+ no longer needs to distinguish between C-based languages. Support for C, C++
+ and Objective-C has been folded into one option in the backend: -C
+ -The code generator has been made independent of the languages that it
+ supports by pushing the language dependent apsects down into the lower
+ levels of the code generator.
+ -Many improvements to the longest match construction were made. It is no
+ longer considered experimental. A longest match machine must appear at the
+ top level of a machine instantiation. Since it does not generate a pure
+ state machine (it may need to backtrack), it cannot be used as an operand to
+ other operators.
+ -References to the current character and current state are now completely
+ banned in EOF actions.
+
+Ragel 4.2 - Sep 16, 2005
+========================
+ (additional details in V4 release notes)
+ -Fixed a bug in the longest match operator. In some states it's possible that
+ we either match a token or match nothing at all. In these states we need to
+ consult the LmSwitch on error so it must be prepared to execute an error
+ handler. We therefore need to init act to this error value (which is zero).
+ We can compute if we need to do this and the code generator emits the
+ initialization only if necessary.
+ -Changed the definition of the token end of longest match actions. It now
+ points to one past the last token. This makes computing the token length
+ easier because you don't have to add one. The longest match variables token
+ start, action identifier and token end are now properly initialized in
+ generated code. They don't need to be initialized in the user's code.
+ -Implemented to-state and from-state actions. These actions are executed on
+ transitions into the state (after the in transition's actions) and on
+ transitions out of the state (before the out transition's actions). See V4
+ release notes for more information.
+ -Since there are no longer any action embedding operators that embed both on
+ transitions and on EOF, any actions that exist in both places will be there
+ because the user has explicitly done so. Presuming this case is rare, and
+ with code duplication in the hands of the user, we therefore give the EOF
+ actions their own action switch in the finish() function. This is further
+ motivated by the fact that the best solution is to do the same for to-state
+ and from-state actions in the main loop.
+ -Longest match actions can now be specified using a named action. Since a
+ word following a longest match item conflicts with the concatenation of a
+ named machine, the => symbol must come immediately before a named action.
+ -The longest match operator permits action and machine definitions in the
+ middle of a longest match construction. These are parsed as if they came
+ before the machine definition they are contained in. Permitting action and
+ machine definitions in a longest match construction allows objects to be
+ defined closer to their use.
+ -The longest match operator can now handle longest match items with no
+ action, where previously Ragel segfaulted.
+ -Updated to Aapl post 2.12.
+ -Fixed a bug in epsilon transition name lookups. After doing a name lookup
+ the result was stored in the parse tree. This is wrong because if a machine
+ is used more than once, each time it may resolve to different targets,
+ however it will be stored in the same place. We now store name resolutions
+ in a separated data structure so that each walk of a parse tree uses the
+ name resolved during the corresponding walk in the name lookup pass.
+ -The operators used to embed context and actions into states have been
+ modified. The V4 release notes contain the full details.
+ -Added zlen builtin machine to represent the zero length machine. Eventually
+ the name "null" will be phased out in favour of zlen because it is unclear
+ whether null matches the zero length string or if it does not match any
+ string at all (as does the empty builtin).
+ -Added verbose versions of action, context and priority embedding. See the V4
+ release notes for the full details. A small example:
+ machine <- all exec { foo(); } <- final eof act1
+ -Bugfix for machines with epsilon ops, but no join operations. I had
+ wrongfully assumed that because epsilon ops can only increase connectivity,
+ that no states are ever merged and therefore a call to fillInStates() is not
+ necessary. In reality, epsilon transitions within one machine can induce the
+ merging of states. In the following, state 2 follows two paths on 'i':
+ main := 'h' -> i 'i h' i: 'i';
+ -Changed the license of the guide from a custom "do not propagate modified
+ versions of this document" license to the GPL.
+
+Ragel 4.1 - Jun 26, 2005
+========================
+ (additional details in V4 release notes)
+ -A bug in include processing was fixed. Surrounding code in an include file
+ was being passed through to the output when it should be ignored. Includes
+ are only for including portions of another machine into he current. This
+ went unnoticed because all tested includes were wrapped in #ifndef ...
+ #endif directives and so did not affect the compilation of the file making
+ the include.
+ -Fixes were made to Vim syntax highlighting file.
+ -Duplicate actions are now removed from action lists.
+ -The character-level negation operator ^ was added. This operator produces a
+ machine that matches single characters that are not matched by the machine
+ it is applied to. This unary prefix operator has the same precedence level
+ as !.
+ -The use of + to specify the a positive literal number was discontinued.
+ -The parser now assigns the subtraction operator a higher precedence than
+ the negation of literal number.
+
+Ragel 4.0 - May 26, 2005
+========================
+ (additional details in V4 release notes)
+ -Operators now strictly embed into a machine either on a specific class of
+ characters or on EOF, but never both. This gives a cleaner association
+ between the operators and the physical state machine entitites they operate
+ on. This change is made up of several parts:
+ 1. '%' operator embeds only into leaving characters.
+ 2. All global and local error operators only embed on error character
+ transitions, their action will not be triggerend on EOF in non-final
+ states.
+ 3. EOF action embedding operators have been added for all classes of states
+ to make up for functionality removed from other operators. These are
+ >/ $/ @/ %/.
+ 4. Start transition operator '>' no longer implicitly embeds into leaving
+ transtions when start state is final.
+ -Ragel now emits warnings about the improper use of statements and values in
+ action code that is embedded as an EOF action. Warnings are emitted for fpc,
+ fc, fexec, fbuf and fblen.
+ -Added a longest match construction operator |* machine opt-action; ... *|.
+ This is for repetition where an ability to revert to a shorter, previously
+ matched item is required. This is the same behaviour as flex and re2c. The
+ longest match operator is not a pure FSM construction, it introduces
+ transitions that implicitly hold the current character or reset execution to
+ a previous location in the input. Use of this operator requires the caller
+ of the machine to occasionally hold onto data after a call to the exectute
+ routine. Use of machines generated with this operator as the input to other
+ operators may have undefined results. See examples/cppscan for an example.
+ This is very experimental code.
+ -Action ids are only assigned to actions that are referenced in the final
+ constructed machine, preventing gaps in the action id sequence. Previously
+ an action id was assigned if the action was referenced during parsing.
+ -Machine specifications now begin with %% and are followed with an optional
+ name and either a single Ragel statement or a sequence of statements
+ enclosed in {}.
+ -Ragel no longer generates the FSM's structure or class. It is up to the user
+ to declare the structure and to give it a variable named curs of type
+ integer. If the machine uses the call stack the user must also declare a
+ array of integers named stack and an integer variable named top.
+ -In the case of Objective-C, Ragel no longer generates the interface or
+ implementation directives, allowing the user to declare additional methods.
+ -If a machine specification does not have a name then Ragel tries to find a
+ name for it by first checking if the specification is inside a struct, class
+ or interface. If it is not then it uses the name of the previous machine
+ specification. If still no name is found then this is an error.
+ -Fsm specifications now persist in memory and statements accumulate.
+ -Ragel now has an include statement for including the statements of a machine
+ spec in another file (perhaps because it is the corresponding header file).
+ The include statement can also be used to draw in the statements of another
+ fsm spec in the current file.
+ -The fstack statement is now obsolete and has been removed.
+ -A new statement, simply 'interface;', indicates that ragel should generate
+ the machine's interface. If Ragel sees the main machine it generates the
+ code sections of the machine. Previously, the header portion was generated
+ if the (now removed) struct statement was found and code was generated if
+ any machine definition was found.
+ -Fixed a bug in the resolution of fsm name references in actions. The name
+ resolution code did not recurse into inline code items with children
+ (fgoto*, fcall*, fnext*, and fexec), causing a segfault at code generation
+ time.
+ -Cleaned up the code generators. FsmCodeGen was made into a virtual base
+ class allowing for the language/output-style specific classes to inherit
+ both a language specific and style-specific base class while retaining only
+ one copy of FsmCodeGen. Language specific output can now be moved into the
+ language specific code generators, requiring less duplication of code in the
+ language/output-style specific leaf classes.
+ -Fixed bugs in fcall* implementation of IpgGoto code generation.
+ -If the element type has not been defined Ragel now uses a constant version
+ of the alphtype, not the exact alphtype. In most cases the data pointer of
+ the execute routine should be const. A non-const element type can still be
+ defined with the element statement.
+ -The fc special value now uses getkey for retrieving the current char rather
+ than *_p, which is wrong if the element type is a structure.
+ -User guide converted to TeX and updated for new 4.0 syntax and semantics.
+
+Ragel 3.7 - Oct 31, 2004
+========================
+ -Bug fix: unreferenced machine instantiations causing segfault due to name
+ tree and parse tree walk becomming out of syncronization.
+ -Rewrote representation of inline code blocks using a tree data structure.
+ This allows special keywords such as fbuf to be used as the operatands of
+ other fsm commands.
+ -Documentation updates.
+ -When deciding whether or not to generate machine instantiations, search the
+ entire name tree beneath the instantiation for references, not just the
+ root.
+ -Removed stray ';' in keller2.rl
+ -Added fexec for restarting the machine with new buffer data (state stays the
+ same), fbuf for retrieving the the start of the buf, and fblen for
+ retrieving the orig buffer length.
+ -Implemented test/cppscan2 using fexec. This allows token emitting and restart
+ to stay inside the execute routine, instead of leaving and re-entering on
+ every token.
+ -Changed examples/cppscan to use fexec and thereby go much faster.
+ -Implemented flex and re2c versions of examples/cppscan. Ragel version
+ goes faster than flex version but not as fast as re2c version.
+ -Merged in Objective-C patch from Eric Ocean.
+ -Turned off syncing with stdio in C++ tests to make them go faster.
+ -Renamed C++ code generaion classes with the Cpp Prefix instead of CC to make
+ them easier to read.
+ -In the finish function emit fbuf as 0 cast to a pointer to the element type
+ so it's type is not interpreted as an integer.
+ -The number -128 underflows char alphabets on some architectures. Removed
+ uses of it in tests.
+ -Disabled the keller2 test because it causes problems on many architectures
+ due to its large size and compilation requirements.
+
+Ragel 3.6 - Jul 10, 2004
+========================
+ -Many documentation updates.
+ -When resolving names, return a set of values so that a reference in an
+ action block that is embedded more than once won't report distinct entry
+ points that are actually the same.
+ -Implemented flat tables. Stores a linear array of indicies into the
+ transition array and only a low and high key value. Faster than binary
+ searching for keys but not usable for large alphabets.
+ -Fixed bug in deleting of transitions leftover from converstion from bst to
+ list implementation of transitions. Other code cleanup.
+ -In table based output calculate the cost of using an index. Don't use if
+ cheaper.
+ -Changed fstate() value available in init and action code to to fentry() to
+ reflect the fact that the values returned are intended to be used as targets
+ in fgoto, fnext and fcall statements. The returned state is not a unique
+ state representing the label. There can be any number of states representing
+ a label.
+ -Added keller2 test, C++ scanning tests and C++ scanning example.
+ -In table based output split up transitions into targets and actions. This
+ allows actions to be omitted.
+ -Broke the components of the state array into separate arrays. Requires
+ adding some fields where they could previously be omitted, however allows
+ finer grained control over the sizes of items and an overal size reduction.
+ Also means that state numbers are not an offset into the state array but
+ instead a sequence of numbers, meaning the context array does not have any
+ wasted bits.
+ -Action lists and transition also have their types chosen to be the smallest
+ possible for accomodating the contained values.
+ -Changed curs state stored in fsm struct from _cs to curs. Keep fsm->curs ==
+ -1 while in machine. Added tests curs1 and curs2.
+ -Implemented the notion of context. Context can be embedded in states using
+ >:, $:, @: and %: operators. These embed a named context into start states,
+ all states, non-start/non-final and final states. If the context is declared
+ using a context statment
+ context name;
+ then the context can be quered for any state using fsm_name_ctx_name(state)
+ in C code and fsm_name::ctx_name(state) in C++ code. This feature makes it
+ possible to determine what "part" of the machine is currently active.
+ -Fixed crash on machine generation of graphs with no final state. If there
+ is no reference to a final state in a join operation, don't generate one.
+ -Updated Vim sytax: added labels to inline code, added various C++ keywords.
+ Don't highlight name separations as labels. Added switch labels, improved
+ alphtype, element and getkey.
+ -Fixed line info in error reporting of bad epsilon trans.
+ -Fixed fstate() for tab code gen.
+ -Removed references to malloc.h.
+
+Ragel 3.5 - May 29, 2004
+========================
+ -When parse errors occur, the partially generated output file is deleted and
+ an non-zero exit status is returned.
+ -Updated Vim syntax file.
+ -Implemented the setting of the element type that is passed to the execute
+ routine as well as method for specifying how ragel should retrive the key
+ from the element type. This lets ragel process arbitrary structures inside
+ of which is the key that is parsed.
+ element struct Element;
+ getkey fpc->character;
+ -The current state is now implemented with an int across all machines. This
+ simplifies working with current state variables. For example this allows a
+ call stack to be implemented in user code.
+ -Implemented a method for retrieving the current state, the target state, and
+ any named states.
+ fcurs -retrieve the current state
+ ftargs -retrieve the target state
+ fstate(name) -retrieve a named state.
+ -Implemented a mechanism for jumping to and calling to a state stored in a
+ variable.
+ fgoto *<expr>; -goto the state returned by the C/C++ expression.
+ fcall *<expr>; -call the state returned by the C/C++ expression.
+ -Implemented a mechanism for specifying the next state without immediately
+ transfering control there (any code following statement is executed).
+ fnext label; -set the state pointed to by label as the next state.
+ fnext *<expr>; -set the state returned by the C/C++ expression as the
+ next.
+ -Action references are determined from the final machine instead of during
+ the parse tree walk. Some actions can be referenced in the parse tree but not
+ show up in the final machine. Machine analysis is now done based on this new
+ computation.
+ -Named state lookup now employs a breadth-first search in the lookup and
+ allows the user to fully qualify names, making it possible to specify
+ jumps/calls into parts of the machine deep in the name hierarchy. Each part
+ of name (separated by ::) employs a breadth first search from it's starting
+ point.
+ -Name references now must always refer to a single state. Since references to
+ multiple states is not normally intended, it no longer happens
+ automatically. This frees the programmer from thinking about whether or not
+ a state reference is unique. It also avoids the added complexity of
+ determining when to merge the targets of multiple references. The effect of
+ references to multiple states can be explicitly created using the join
+ operator and epsilon transitions.
+ -M option was split into -S and -M. -S specifies the machine spec to generate
+ for graphviz output and dumping. -M specifies the machine definition or
+ instantiation.
+ -Machine function parameters are now prefixed with and underscore to
+ avoid the hiding of class members.
+
+Ragel 3.4 - May 8, 2004
+=======================
+ -Added the longest match kleene star operator **, which is synonymous
+ with ( ( <machine> ) $0 %1 ) *.
+ -Epsilon operators distinguish between leaving transitions (going to an
+ another expression in a comma separated list) and non-leaving transitions.
+ Leaving actions and priorities are appropriately transferred.
+ -Relative priority of following ops changed to:
+ 1. Action/Priority
+ 2. Epsilon
+ 3. Label
+ If label is done first then the isolation of the start state in > operators
+ will cause the label to point to the old start state that doesn't have the
+ new action/priority.
+ -Merged >! and >~, @! and @~, %! and %~, and $! and $~ operators to have one
+ set of global error action operators (>!, @!, %! and $!) that are invoked on
+ error by unexpected characters as well as by unexepected EOF.
+ -Added the fpc keyword for use in action code. This is a pointer to the
+ current character. *fpc == fc. If an action is invoked on EOF then fpc == 0.
+ -Added >^, @^, %^, and $^ local error operators. Global error operators (>!,
+ @!, $!, and %!) cause actions to be invoked if the final machine fails.
+ Local error actions cause actions to be invoked if if the current machine
+ fails.
+ -Changed error operators to mean embed global/local error actions in:
+ >! and !^ -the start state.
+ @! and @^ -states that are not the start state and are not final.
+ %! and %^ -final states.
+ $! and $^ -all states.
+ -Added >@! which is synonymous >! then @!
+ -Added >@^ which is synonymous >^ then @^
+ -Added @%! which is synonymous @! then %!
+ -Added @%^ which is synonymous >^ then @^
+ -FsmGraph representation of transition lists was changed from a mapping of
+ alphabet key -> transition objects using a BST to simply a list of
+ transition objects. Since the transitions are no longer divided by
+ single/range, the fast finding of transition objects by key is no longer
+ required functionality and can be eliminated. This new implementation uses
+ the same amount of memory however causes less allocations. It also make more
+ sense for supporting error transitions with actions. Previously an error
+ transition was represented by a null value in the BST.
+ -Regular expression ranges are checked to ensure that lower <= upper.
+ -Added printf-like example.
+ -Added atoi2, erract2, and gotcallret to the test suite.
+ -Improved build test to support make -jN and simplified the compiling and
+ running of tests.
+
+Ragel 3.3 - Mar 7, 2004
+=========================
+ -Portability bug fixes were made. Minimum and maximum integer values are
+ now taken from the system. An alignment problem on 64bit systems
+ was fixed.
+
+Ragel 3.2 - Feb 28, 2004
+========================
+ -Added a Vim syntax file.
+ -Eliminated length var from generated execute code in favour of an end
+ pointer. Using length requires two variables be read and written. Using an
+ end pointer requires one variable read and written and one read. Results in
+ more optimizable code.
+ -Minimization is now on by default.
+ -States are ordered in output by depth first search.
+ -Bug in minimization fixed. States were not being distinguished based on
+ error actions.
+ -Added null and empty builtin machines.
+ -Added EOF error action operators. These are >~, >@, $~, and %~. EOF error
+ operators embed actions to take if the EOF is seen and interpreted as an
+ error. The operators correspond to the following states:
+ -the start state
+ -any state with a transition to a final state
+ -any state with a transiion out
+ -a final state
+ -Fixed bug in generation of unreference machine vars using -M. Unreferenced
+ vars don't have a name tree built underneath when starting from
+ instantiations. Need to instead build the name tree starting at the var.
+ -Calls, returns, holds and references to fc in out action code are now
+ handled for ipgoto output.
+ -Only actions referenced by an instantiated machine expression are put into
+ the action index and written out.
+ -Added rlscan, an example that lexes Ragel input.
+
+Ragel 3.1 - Feb 18, 2004
+========================
+ -Duplicates in OR literals are removed and no longer cause an assertion
+ failure.
+ -Duplicate entry points used in goto and call statements are made into
+ deterministic entry points.
+ -Base FsmGraph code moved from aapl into ragel, as an increasing amount
+ of specialization is required. Too much time was spent attempting to
+ keep it as a general purpose template.
+ -FsmGraph code de-templatized and heirarchy squashed to a single class.
+ -Single transitions taken out of FsmGraph code. In the machine construction
+ stage, transitions are now implemented only with ranges and default
+ transtions. This reduces memory consumption, simplifies code and prevents
+ covered transitions. However it requires the automated selection of single
+ transitions to keep goto-driven code lean.
+ -Machine reduction completely rewritten to be in-place. As duplicate
+ transitions and actions are found and the machine is converted to a format
+ suitable for writing as C code or as GraphViz input, the memory allocated
+ for states and transitions is reused, instead of newly allocated.
+ -New reduction code consolodates ranges, selects a default transition, and
+ selects single transitions with the goal of joining ranges that are split by
+ any number of single characters.
+ -Line directive changed from "# <num> <file>" to the more common format
+ "#line <num> <file>".
+ -Operator :! changed to @!. This should have happened in last release.
+ -Added params example.
+
+Ragel 3.0 - Jan 22, 2004
+========================
+ -Ragel now parses the contents of struct statements and action code.
+ -The keyword fc replaces the use of *p to reference the current character in
+ action code.
+ -Machine instantiations other than main are allowed.
+ -Call, jump and return statements are now available in action code. This
+ facility makes it possible to jump to an error handling machine, call a
+ sub-machine for parsing a field or to follow paths through a machine as
+ determined by arbitrary C code.
+ -Added labels to the language. Labels can be used anywhere in a machine
+ expression to define an entry point. Also references to machine definitions
+ cause the implicit creation of a label.
+ -Added epsilon transitions to the language. Epsilon operators may reference
+ labels in the current name scope resolved when join operators are evaluated
+ and at the root of the expression tree of machine assignment/instantiation.
+ -Added the comma operator, which joins machines together without drawing any
+ transitions between them. This operator is useful in combination with
+ labels, the epsilon operator and user code transitions for defining machines
+ using the named state and transition list paradigm. It is also useful for
+ invoking transitions based on some analysis of the input or on the
+ environment.
+ -Added >!, :!, $!, %! operators for specifying actions to take should the
+ machine fail. These operators embed actions to execute if the machine
+ fails in
+ -the start state
+ -any state with a transition to a final state
+ -any state with a transiion out
+ -a final state
+ The general rule is that if an action embedding operator embeds an action
+ into a set of transitions T, then the error-counterpart with a ! embeds an
+ action into the error transition taken when any transition T is a candidate,
+ but does not match the input.
+ -The finishing augmentation operator ':' has been changed to '@'. This
+ frees the ':' symbol for machine labels and avoids hacks to the parser to
+ allow the use of ':' for both labels and finishing augmentations. The best
+ hack required that label names be distinct from machine definition names as
+ in main := word : word; This restriction is not good because labels are
+ local to the machine that they are used in whereas machine names are global
+ entities. Label name choices should not be restricted by the set of names
+ that are in use for machines.
+ -Named priority syntax now requires parenthesis surrounding the name and
+ value pair. This avoids grammar ambiguities now that the ',' operator has
+ been introduced and makes it more clear that the name and value are an
+ asscociated pair.
+ -Backslashes are escaped in line directive paths.
+
+Ragel 2.2 - Oct 6, 2003
+=======================
+ -Added {n}, {,n}, {n,} {n,m} repetition operators.
+ <expr> {n} -- exactly n repetitions
+ <expr> {,n} -- zero to n repetitions
+ <expr> {n,} -- n or more repetitions
+ <expr> {n,m} -- n to m repetitions
+ -Bug in binary search table in Aapl fixed. Fixes crashing on machines that
+ add to action tables that are implicitly shared among transitions.
+ -Tests using obsolete minimization algorithms are no longer built and run by
+ default.
+ -Added atoi and concurrent from examples to the test suite.
+
+Ragel 2.1 - Sep 22, 2003
+========================
+ -Bug in priority comparison code fixed. Segfaulted on some input with many
+ embedded priorities.
+ -Added two new examples.
+
+Ragel 2.0 - Sep 7, 2003
+=======================
+ -Optional (?), One or More (+) and Kleene Star (*) operators changed from
+ prefix to postfix. Rationale is that postfix version is far more common in
+ regular expression implementations and will be more readily understood.
+ -All priority values attached to transitions are now accompanied by a name.
+ Transitions no longer have default priority values of zero assigned
+ to them. Only transitions that have different priority values assigned
+ to the same name influence the NFA-DFA conversion. This scheme reduces
+ side-effects of priorities.
+ -Removed the %! statement for unsetting pending out priorities. With
+ named priorities, it is not necessary to clear the priorities of a
+ machine with $0 %! because non-colliding names can be used to avoid
+ side-effects.
+ -Removed the clear keyword, which was for removing actions from a machine.
+ Not required functionality and it is non-intuitive to have a language
+ feature that undoes previous definitions.
+ -Removed the ^ modifier to repetition and concatenation operators. This
+ undocumented feature prevented out transitions and out priorities from being
+ transfered from final states to transitions leaving machines. Not required
+ functionality and complicates the language unnecessarily.
+ -Keyword 'func' changed to 'action' as a part of the phasing out of the term
+ 'function' in favour of 'action'. Rationale is that the term 'function'
+ implies that the code is called like a C function, which is not necessarily
+ the case. The term 'action' is far more common in state machine compiler
+ implementations.
+ -Added the instantiation statement, which looks like a standard variable
+ assignment except := is used instead of =. Instantiations go into the
+ same graph dictionary as definitions. In the the future, instantiations
+ will be used as the target for gotos and calls in action code.
+ -The main graph should now be explicitly instantiated. If it is not,
+ a warning is issued.
+ -Or literal basic machines ([] outside of regular expressions) now support
+ negation and ranges.
+ -C and C++ interfaces lowercased. In the C interface an underscore now
+ separates the fsm machine and the function name. Rationale is that lowercased
+ library and generated routines are more common.
+ C output:
+ int fsm_init( struct clang *fsm );
+ int fsm_execute( struct clang *fsm, char *data, int dlen );
+ int fsm_finish( struct clang *fsm );
+ C++ output:
+ int fsm::init( );
+ int fsm::execute( char *data, int dlen );
+ int fsm::finish( );
+ -Init, execute and finish all return -1 if the machine is in the error state
+ and can never accept, 0 if the machine is in a non-accepting state that has a
+ path to a final state and 1 if the machine is in an accepting state.
+ -Accept routine eliminated. Determining whether or not the machine accepts is
+ done by examining the return value of the finish routine.
+ -In C output, fsm structure is no longer a typedef, so referencing requires
+ the struct keyword. This is to stay in line with C language conventions.
+ -In C++ output, constructor is no longer written by ragel. As a consequence,
+ init routine is not called automatically. Allows constructor to be supplied
+ by user as well as the return value of init to be examined without calling it
+ twice.
+ -Static start state and private structures are taken out of C++ classes.
+
+Ragel 1.5.4 - Jul 14, 2003
+==========================
+ -Workaround for building with bison 1.875, which produces an
+ optimization that doesn't build with newer version gcc.
+
+Ragel 1.5.3 - Jul 10, 2003
+==========================
+ -Fixed building with versions of flex that recognize YY_NO_UNPUT.
+ -Fixed version numbers in ragel.spec file.
+
+Ragel 1.5.2 - Jul 7, 2003
+=========================
+ -Transition actions and out actions displayed in the graphviz output.
+ -Transitions on negative numbers handled in graphviz output.
+ -Warning generated when using bison 1.875 now squashed.
+
+Ragel 1.5.1 - Jun 21, 2003
+==========================
+ -Bugs fixed: Don't delete the output objects when writing to standard out.
+ Copy mem into parser buffer with memcpy, not strcpy. Fixes buffer mem errror.
+ -Fixes for compiling with Sun WorkShop 6 compilers.
+
+Ragel 1.5.0 - Jun 10, 2003
+==========================
+ -Line directives written to the output so that errors in the action code
+ are properly reported in the ragel input file.
+ -Simple graphviz dot file output format is supported. Shows states and
+ transitions. Does not yet show actions.
+ -Options -p and -f dropped in favour of -d output format.
+ -Added option -M for specifying the machine to dump with -d or the graph to
+ generate with -V.
+ -Error recovery implemented.
+ -Proper line and column number tracking implemented in the scanner.
+ -All action/function code is now embedded in the main Execute routine. Avoids
+ duplication of action code in the Finish routine and the need to call
+ ExecFuncs which resulted in huge code bloat. Will also allow actions to
+ modify cs when fsm goto, call and return is supported in action code.
+ -Fsm spec can have no statements, nothing will be generated.
+ -Bug fix: Don't accept ] as the opening of a .-. range a reg exp.
+ -Regular expression or set ranges (ie /[0-9]/) are now handled by the parser
+ and consequently must be well-formed. The following now generates a parser
+ error: /[+-]/ and must be rewritten as /[+\-]/. Also fixes a bug whereby ]
+ might be accepted as the opening of a .-. range causing /[0-9]-[0-9]/ to
+ parse incorrectly.
+ -\v, \f, and \r are now treated as whitespace in an fsm spec.
+
+Ragel 1.4.1 - Nov 19, 2002
+==========================
+ -Compile fixes. The last release (integer alphabets) was so exciting
+ that usual portability checks got bypassed.
+
+Ragel 1.4.0 - Nov 19, 2002
+==========================
+ -Arbitrary integer alphabets are now fully supported! A new language
+ construct:
+ 'alphtype <type>' added for specifying the type of the alphabet. Default
+ is 'char'. Possible alphabet types are:
+ char, unsigned char, short, unsigned short, int, unsigned int
+ -Literal machines specified in decimal format can now be negative when the
+ alphabet is a signed type.
+ -Literal machines (strings, decimal and hex) have their values checked for
+ overflow/underflow against the size of the alphabet type.
+ -Table driven and goto driven output redesigned to support ranges. Table
+ driven uses a binary search for locating single characters and ranges. Goto
+ driven uses a switch statement for single characters and nested if blocks for
+ ranges.
+ -Switch driven output removed due to a lack of consistent advantages. Most of
+ the time the switch driven FSM is of no use because the goto FSM makes
+ smaller and faster code. Under certain circumstances it can produce smaller
+ code than a goto driven fsm and be almost as fast, but some sporadic case
+ does not warrant maintaining it.
+ -Many warnings changed to errors.
+ -Added option -p for printing the final fsm before minimization. This lets
+ priorities be seen. Priorties are all reset to 0 before minimization. The
+ exiting option -f prints the final fsm after minimization.
+ -Fixed a bug in the clang test and example that resulted in redundant actions
+ being executed.
+
+Ragel 1.3.4 - Nov 6, 2002
+=========================
+ -Fixes to Chapter 1 of the guide.
+ -Brought back the examples and made them current.
+ -MSVC is no longer supported for compiling windows binaries because its
+ support for the C++ standard is frustratingly inadequate, it will cost money
+ to upgrade if it ever gets better, and MinGW is a much better alternative.
+ -The build system now supports the --host= option for building ragel
+ for another system (used for cross compiling a windows binary with MinGW).
+ -Various design changes and fixes towards the goal of arbitrary integer
+ alphabets and the handling of larger state machines were made.
+ -The new shared vector class is now used for action lists in transitions and
+ states to reduce memory allocations.
+ -An avl tree is now used for the reduction of transitions and functions of an
+ fsm graph before making the final machine. The tree allows better scalability
+ and performance by not requiring consecutively larger heap allocations.
+ -Final stages in the separation of fsm graph code from action embedding and
+ priority assignment is complete. Makes the base graph leaner and easier to reuse
+ in other projects (like Keller).
+
+Ragel 1.3.3 - Oct 22, 2002
+==========================
+ -More diagrams were added to section 1.7.1 of the user guide.
+ -FSM Graph code was reworked to spearate the regex/nfa/minimizaion graph
+ algorithms from the manipulation of state and transition properties.
+ -An rpm spec file from Cris Bailiff was added. This allows an rpm for ragel
+ to be built with the command 'rpm -ta ragel-x.x.x.tar.gz'
+ -Fixes to the build system and corresponding doc updates in the README.
+ -Removed autil and included the one needed source file directly in the top
+ level ragel directory.
+ -Fixed a bug that nullified the 20 times speedup in large compilations
+ claimed by the last version.
+ -Removed awk from the doc build (it was added with the last release -- though
+ not mentioned in the changelog).
+ -Install of man page was moved to the doc dir. The install also installs the
+ user guide to $(PREFIX)/share/doc/ragel/
+
+Ragel 1.3.2 - Oct 16, 2002
+==========================
+ -Added option -v (or --version) to show version information.
+ -The subtract operator no longer removes transition data from the machine
+ being subtracted. This is left up to the user for the purpose of making it
+ possible to transfer transitions using subtract and also for speeding up the
+ subtract routine. Note that it is possible to explicitly clear transition
+ data before a doing a subtract.
+ -Rather severe typo bug fixed. Bug was related to transitions with higher
+ priorities taking precedence. A wrong ptr was being returned. It appears to
+ have worked most of the time becuase the old ptr was deleted and the new one
+ allocated immediatly after so the old ptr often pointed to the same space.
+ Just luck though.
+ -Bug in the removing of dead end paths was fixed. If the start state
+ has in transitions then those paths were not followed when finding states to
+ keep. Would result in non-dead end states being removed from the graph.
+ -In lists and in ranges are no longer maintained as a bst with the key as the
+ alphabet character and the value as a list of transitions coming in on that
+ char. There is one list for each of inList, inRange and inDefault. Now that
+ the required functionality of the graph is well known it is safe to remove
+ these lists to gain in speed and footprint. They shouldn't be needed.
+ -IsolateStartState() runs on modification of start data only if the start
+ state is not already isolated, which is now possible with the new in list
+ representation.
+ -Concat, Or and Star operators now use an approximation to
+ removeUnreachableStates that does not require a traversal of the entire
+ graph. This combined with an 'on-the-fly' management of final bits and final
+ state status results is a dramatic speed increase when compiling machines
+ that use those operators heavily. The strings2 test goes 20 times faster.
+ -Before the final minimization, after all fsm operations are complete,
+ priority data is reset which enables better minimization in cases where
+ priorities would otherwise separate similar states.
+
+Ragel 1.3.1 - Oct 2, 2002
+=========================
+ -Range transitions are now used to implement machines made with /[a-z]/ and
+ the .. operator as well as most of the builtin machines. The ranges are not
+ yet reflected in the output code, they are expanded as if they came from the
+ regular single transitions. This is one step closer to arbitrary integer
+ output.
+ -The builtin machine 'any' was added. It is equiv to the builtin extend,
+ matching any characters.
+ -The builtin machine 'cntrl' now includes newline.
+ -The builtin machine 'space' now includes newline.
+ -The builtin machine 'ascii' is now the range 0-127, not all characters.
+ -A man page was written.
+ -A proper user guide was started. Chapter 1: Specifying Ragel Programs
+ was written. It even has some diagrams :)
+
+Ragel 1.3.0 - Sept 4, 2002
+==========================
+ -NULL keyword no longer used in table output.
+ -Though not yet in use, underlying graph structure changed to support range
+ transitions. As a result, most of the code that walks transition lists is now
+ implemented with an iterator that hides the complexity of the transition
+ lists and ranges. Range transitions will be used to implement /[a-z]/ style
+ machines and machines made with the .. operator. Previously a single
+ transition would be used for each char in the range, which is very costly.
+ Ranges eliminate much of the space complexity and allow for the .. operator
+ to be used with very large (integer) alphabets.
+ -New minimization similar to Hopcroft's alg. It does not require n^2 space and
+ runs close to O(n*log(n)) (an exact analysis of the alg is very hard). It is
+ much better than the stable and approx minimization and obsoletes them both.
+ An exact implementation of Hopcroft's alg is desirable but not possible
+ because the ragel implementation does not assume a finite alphabet, which
+ Hopcroft's requires. Ragel will support arbitrary integer alphabets which
+ must be treated as an infinite set for implementation considerations.
+ -New option -m using above described minimization to replace all previous
+ minimization options. Old options sill work but are obsolete and not
+ advertised with -h.
+ -Bug fixed in goto style output. The error exit set the current state to 0,
+ which is actually a valid state. If the machine was entered again it would go
+ into the first state, very wrong. If the first state happened to be final then
+ an immediate finish would accept when in fact it should fail.
+ -Slightly better fsm minimization now capable due to clearing of the
+ transition ordering numbers just prior to minimization.
+
+Ragel 1.2.2 - May 25, 2002
+==========================
+ -Configuration option --prefix now works when installing.
+ -cc file extension changed to cpp for better portability.
+ -Unlink of output file upon error no longer happens, removes dependency on
+ unlink system command.
+ -All multiline strings removed: not standard c++.
+ -Awk build dependency removed.
+ -MSVC 6.0 added to the list of supported compilers (with some tweaking of
+ bison and flex output).
+
+Ragel 1.2.1 - May 13, 2002
+==========================
+ -Automatic dependencies were fixed, they were not working correctly.
+ -Updated AUTHORS file to reflect contributors.
+ -Code is more C++ standards compliant: compiles with g++ 3.0
+ -Fixed bugs that only showed up in g++ 3.0
+ -Latest (unreleased) Aapl.
+ -Configuration script bails out if bison++ is installed. Ragel will not
+ compile with bison++ because it is coded in c++ and bison++ automatically
+ generates a c++ parser. Ragel uses a c-style bison parser.
+
+Ragel 1.2.0 - May 3, 2002
+=========================
+ -Underlying graph structure now supports default transitions. The result is
+ that a transition does not need to be made for each char of the alphabet
+ when making 'extend' or '/./' machines. Ragel compiles machines that
+ use the aforementioned primitives WAY faster.
+ -The ugly hacks needed to pick default transitions now go away due to
+ the graph supporting default transitions directly.
+ -If -e is given, but minimization is not turned on, print a warning.
+ -Makefiles use automatic dependencies.
+
+Ragel 1.1.0 - April 15, 2002
+============================
+ -Added goto fsm: much faster than any other fsm style.
+ -Default operator (if two machines are side by side with no operator
+ between them) is concatenation. First showed up in 1.0.4.
+ -The fsm machine no longer auotmatically builds the flat table for
+ transition indicies. Instead it keeps the key,ptr pair. In tabcodegen
+ the flat table is produced. This way very large alphabets with sparse
+ transitions will not consume large amounts of mem. This is also in prep
+ for fsm graph getting a default transition.
+ -Generated code contains a statement explicitly stating that ragel fsms
+ are NOT covered by the GPL. Technically, Ragel copies part of itself
+ to the output to make the generic fsm execution routine (for table driven
+ fsms only) and so the output could be considered under the GPL. But this
+ code is very trivial and could easlily be rewritten. The actual fsm data
+ is subject to the copyright of the source. To promote the use of Ragel,
+ a special exception is made for the part of the output copied from Ragel:
+ it may be used without restriction.
+ -Much more elegant code generation scheme is employed. Code generation
+ class members need only put the 'codegen' keyword after their 'void' type
+ in order to be automatically registerd to handle macros of the same name.
+ An awk script recognises this keyword and generates an appropriate driver.
+ -Ragel gets a test suite.
+ -Postfunc and prefunc go away because they are not supported by non
+ loop-driven fsms (goto, switch) and present duplicate functionality.
+ Universal funcs can be implemented by using $ operator.
+ -Automatic dependencies used in build system, no more make depend target.
+ -Code generation section in docs.
+ -Uses the latests aapl.
+
+Ragel 1.0.5 - March 3, 2002
+===========================
+ -Bugfix in SetErrorState that caused an assertion failure when compiling
+ simple machines that did not have full transition tables (and thus did
+ not show up on any example machines). Assertion failure did not occur
+ when using the switch statement code as ragel does not call SetErrorState
+ in that case.
+ -Fixed some missing includes, now compiles on redhat.
+ -Moved the FsmMachTrans Compare class out of FsmMachTrans. Some compilers
+ don't deal with nested classes in templates too well.
+ -Removed old unused BASEREF in fsmgraph and ragel now compiles using
+ egcs-2.91.66 and presumably SUNWspro. The baseref is no longer needed
+ because states do not support being elements in multiple lists. I would
+ rather be able to support more compilers than have this feature.
+ -Started a README with compilation notes. Started an AUTHORS file.
+ -Started the user documentation. Describes basic machines and operators.
+
+Ragel 1.0.4 - March 1, 2002
+===========================
+ -Ported to the version of Aapl just after 2.2.0 release. See
+ http://www.ragel.ca/aapl/ for details on aapl.
+ -Fixed a bug in the clang example: the newline machine was not stared.
+ -Added explanations to the clang and mailbox examples. This should
+ help people that want to learn the lanuage as the manual is far from
+ complete.
+
+Ragel 1.0.3 - Feb 2, 2002
+=========================
+ -Added aapl to the ragel tree. No longer requires you to download
+ and build aapl separately. Should avoid discouraging impatient users
+ from compiling ragel.
+ -Added the examples to the ragel tree.
+ -Added configure script checks for bison and flex.
+ -Fixed makefile so as not to die with newer versions of bison that
+ write the header of the parser to a .hh file.
+ -Started ChangeLog file.
+
+Ragel 1.0.2 - Jan 30, 2002
+==========================
+ -Bug fix in calculating highIndex for table based code. Was using
+ the length of out tranisition table rather than the value at the
+ end.
+ -If high/low index are at the limits, output a define in their place,
+ not the high/low values themselves so as not to cause compiler warnings.
+ -If the resulting machines don't have any indicies or functions, then
+ omit the empty unrefereced static arrays so as not to cause compiler
+ warnings about unused static vars.
+ -Fixed variable sized indicies support. The header cannot have any
+ reference to INDEX_TYPE as that info is not known at the time the header
+ data is written. Forces us to use a void * for pointers to indicies. In
+ the c++ versions we are forced to make much of the data non-member
+ static data in the code portion for the same reason.
+
+Ragel 1.0.1 - Jan 28, 2002
+==========================
+ -Exe name change from reglang to ragel.
+ -Added ftabcodegen output code style which uses a table for states and
+ transitions but uses a switch statement for the function execution.
+ -Reformatted options in usage dump to look better.
+ -Support escape sequences in [] sections of regular expressions.
+
+Ragel 1.0 - Jan 25, 2002
+========================
+ -Initial release.
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 0000000..9b16e8e
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,56 @@
+#
+# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+SUBDIRS = common ragel rlcodegen test examples doc
+
+#*************************************
+
+# Programs
+CXX = @CXX@
+
+# Get the version info.
+include version.mk
+
+# Rules.
+all:
+ @cd common && $(MAKE) && cd ../ragel && $(MAKE) && cd ../rlcodegen && $(MAKE)
+
+new-version:
+ sed 's/^\(Version:[[:space:]]*\)[0-9.]*$$/\1$(VERSION)/' ragel.spec > spec-new
+ cat spec-new > ragel.spec && rm spec-new
+
+distclean: distclean-rec distclean-local
+
+distclean-rec:
+ for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean; cd ..; done
+
+distclean-local: clean-local
+ rm -f Makefile config.cache config.status config.log
+
+clean: clean-rec clean-local
+
+clean-rec:
+ for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean; cd ..; done
+
+clean-local:
+ rm -f tags
+
+install:
+ @cd ragel && $(MAKE) install && cd ../rlcodegen && $(MAKE) install
diff --git a/README b/README
new file mode 100644
index 0000000..f4a5817
--- /dev/null
+++ b/README
@@ -0,0 +1,54 @@
+
+ Ragel State Machine Compiler -- README
+ ======================================
+
+1. Build Requirements
+---------------------
+
+ * GNU Make
+ * g++
+
+If you would like to modify Ragel and need to build Ragel's scanners and
+parsers from the specifications then set BUILD_PARSERS=true in the configure
+script and then run it. To build the parsers you will need the following
+programs:
+
+ * flex
+ * bison (recent version and not bison++, see below)
+ * gperf
+
+To build the user guide the following extra programs are needed:
+
+ * fig2dev
+ * pdflatex
+
+
+2. Compilation
+--------------
+
+To configure type './configure'. The makefiles honour the --prefix option to
+specify where the program is to be installed to.
+
+To build the ragel program type 'make'.
+
+To build all the documentation cd to 'doc' and type 'make'. If you don't have
+all of the programs to build the user guide and just want the man page use
+'make ragel.1 rlcodegen.1'.
+
+
+3. Installing
+-------------
+
+The command 'make install' will build the programs and install them to $PREFIX/bin/.
+A 'make install' in the doc directory will make and install all the
+documentation. The man pages install to $PREFIX/man/man1/ and the user guide
+and ChangeLog install to $PREFIX/share/doc/ragel/. To install just the man page
+use 'make man-install'.
+
+
+4. Why Ragel cannot be built with Bison++
+-----------------------------------------
+Ragel is written in C++ using a C-style parser. Bison++ sees that we are using
+C++ and generates classes, which breaks the build. As of last investigation,
+this can't be stopped. Bison++ is therefore only compatible with Bison if you
+are implementing a C-style parser in C.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..baf5c05
--- /dev/null
+++ b/TODO
@@ -0,0 +1,48 @@
+fbreak should advance the current char. Depreciate fbreak and add
+ fctl_break;
+ fctl_return <expr>;
+ fctl_goto <label>;
+
+It should be possible to import/export definitions.
+
+If a scanner can be optimized into a pure state machine, maybe permit it to be
+referenced as a machine definition. Alternately: inline scanners with an
+explicit exit pattern.
+
+The split codegen needs a profiler connected to a graph partitioning algorithm.
+
+Die a graceful death when rlcodegen -F receives large alphabets.
+
+It's not currently possible to have more than one machine in a single function
+because of label conflicts. Labels should have a unique prefix.
+
+Emit a warning when a subtraction has no effect.
+
+Emit a warning when unnamed priorities are used in longest match machines.
+These priorities may unexpectedly interact across longest-match items. Changing
+the language such that unwated interaction cannot happen would require naming
+longest-match items.
+
+Testing facilities: Quick easy way to query which strings are accepted.
+Enumerate all accepted strings. From Nicholas Maxwell Lester.
+
+Add more examples, add more tests and write more documentation.
+
+A debugger would be nice. Ragel could emit a special debug version that
+prompted for debug commands that allowed the user to step through the machine
+and get details about where they are in their RL.
+
+Frontend should allow the redefinition of fsm section delimiters.
+
+Do more to obscure ragel's private variables. Just a leading underscore is not
+enough. Maybe something more like __ri__.
+
+Some talk about capturing data:
+
+Separate tokstart/tokend from the backtracking. One var for preservation,
+called preserve. Write delcarations; produces the necessary variables used by
+ragel. Move pattern start pattern end concepts into the general? The
+variables which may need to influence the preserve is dependent on the state.
+States have a concept of which variables are in use. Can be used for length
+restrictions. If there is an exit pattern, it is the explicit way out,
+otherwise the start state and all final states are a way out.
diff --git a/aapl/COPYING b/aapl/COPYING
new file mode 100644
index 0000000..c6ed510
--- /dev/null
+++ b/aapl/COPYING
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/aapl/README b/aapl/README
new file mode 100644
index 0000000..a2fa5e6
--- /dev/null
+++ b/aapl/README
@@ -0,0 +1,6 @@
+This directory contains the Aapl source distribution. For the
+documentation, build scripts, test programs, ChangeLog, etc. get the
+aapldev package.
+
+AaplDev and other information about Aapl is available from
+http://www.elude.ca/aapl/
diff --git a/aapl/avlbasic.h b/aapl/avlbasic.h
new file mode 100644
index 0000000..780ef07
--- /dev/null
+++ b/aapl/avlbasic.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLBASIC_H
+#define _AAPL_AVLBASIC_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlBasic
+ * \brief AVL Tree in which the entire element structure is the key.
+ *
+ * AvlBasic is an AVL tree that does not distinguish between the element that
+ * it contains and the key. The entire element structure is the key that is
+ * used to compare the relative ordering of elements. This is similar to the
+ * BstSet structure.
+ *
+ * AvlBasic does not assume ownership of elements in the tree. Items must be
+ * explicitly de-allocated.
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Element, class Compare
+#define AVLMEL_TEMPDEF class Element, class Compare
+#define AVLMEL_TEMPUSE Element, Compare
+#define AvlTree AvlBasic
+#define AVL_BASIC
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef AVL_BASIC
+
+#endif /* _AAPL_AVLBASIC_H */
diff --git a/aapl/avlcommon.h b/aapl/avlcommon.h
new file mode 100644
index 0000000..1984531
--- /dev/null
+++ b/aapl/avlcommon.h
@@ -0,0 +1,1622 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* This header is not wrapped in ifndef becuase it is not intended to
+ * be included by the user. */
+
+#include <assert.h>
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+#ifdef WALKABLE
+/* This is used by AvlTree, AvlMel and AvlMelKey so it
+ * must be protected by global ifdefs. */
+#ifndef __AAPL_AVLI_EL__
+#define __AAPL_AVLI_EL__
+
+/**
+ * \brief Tree element properties for linked AVL trees.
+ *
+ * AvliTreeEl needs to be inherited by classes that intend to be element in an
+ * AvliTree.
+ */
+template<class SubClassEl> struct AvliTreeEl
+{
+ /**
+ * \brief Tree pointers connecting element in a tree.
+ */
+ SubClassEl *left, *right, *parent;
+
+ /**
+ * \brief Linked list pointers.
+ */
+ SubClassEl *prev, *next;
+
+ /**
+ * \brief Height of the tree rooted at this element.
+ *
+ * Height is required by the AVL balancing algorithm.
+ */
+ long height;
+};
+#endif /* __AAPL_AVLI_EL__ */
+
+#else /* not WALKABLE */
+
+/* This is used by All the non walkable trees so it must be
+ * protected by a global ifdef. */
+#ifndef __AAPL_AVL_EL__
+#define __AAPL_AVL_EL__
+/**
+ * \brief Tree element properties for linked AVL trees.
+ *
+ * AvlTreeEl needs to be inherited by classes that intend to be element in an
+ * AvlTree.
+ */
+template<class SubClassEl> struct AvlTreeEl
+{
+ /**
+ * \brief Tree pointers connecting element in a tree.
+ */
+ SubClassEl *left, *right, *parent;
+
+ /**
+ * \brief Height of the tree rooted at this element.
+ *
+ * Height is required by the AVL balancing algorithm.
+ */
+ long height;
+};
+#endif /* __AAPL_AVL_EL__ */
+#endif /* def WALKABLE */
+
+
+#if defined( AVLTREE_MAP )
+
+#ifdef WALKABLE
+
+/**
+ * \brief Tree element for AvliMap
+ *
+ * Stores the key and value pair.
+ */
+template <class Key, class Value> struct AvliMapEl :
+ public AvliTreeEl< AvliMapEl<Key, Value> >
+{
+ AvliMapEl(const Key &key)
+ : key(key) { }
+ AvliMapEl(const Key &key, const Value &value)
+ : key(key), value(value) { }
+
+ const Key &getKey() const { return key; }
+
+ /** \brief The key. */
+ Key key;
+
+ /** \brief The value. */
+ Value value;
+};
+#else /* not WALKABLE */
+
+/**
+ * \brief Tree element for AvlMap
+ *
+ * Stores the key and value pair.
+ */
+template <class Key, class Value> struct AvlMapEl :
+ public AvlTreeEl< AvlMapEl<Key, Value> >
+{
+ AvlMapEl(const Key &key)
+ : key(key) { }
+ AvlMapEl(const Key &key, const Value &value)
+ : key(key), value(value) { }
+
+ const Key &getKey() const { return key; }
+
+ /** \brief The key. */
+ Key key;
+
+ /** \brief The value. */
+ Value value;
+};
+#endif /* def WALKABLE */
+
+#elif defined( AVLTREE_SET )
+
+#ifdef WALKABLE
+/**
+ * \brief Tree element for AvliSet
+ *
+ * Stores the key.
+ */
+template <class Key> struct AvliSetEl :
+ public AvliTreeEl< AvliSetEl<Key> >
+{
+ AvliSetEl(const Key &key) : key(key) { }
+
+ const Key &getKey() const { return key; }
+
+ /** \brief The key. */
+ Key key;
+};
+#else /* not WALKABLE */
+/**
+ * \brief Tree element for AvlSet
+ *
+ * Stores the key.
+ */
+template <class Key> struct AvlSetEl :
+ public AvlTreeEl< AvlSetEl<Key> >
+{
+ AvlSetEl(const Key &key) : key(key) { }
+
+ const Key &getKey() const { return key; }
+
+ /** \brief The key. */
+ Key key;
+};
+#endif /* def WALKABLE */
+
+#endif /* AVLTREE_SET */
+
+/* Common AvlTree Class */
+template < AVLMEL_CLASSDEF > class AvlTree
+#if !defined( AVL_KEYLESS ) && defined ( WALKABLE )
+ : public Compare, public BASELIST
+#elif !defined( AVL_KEYLESS )
+ : public Compare
+#elif defined( WALKABLE )
+ : public BASELIST
+#endif
+{
+public:
+ /**
+ * \brief Create an empty tree.
+ */
+#ifdef WALKABLE
+ AvlTree() : root(0), treeSize(0) { }
+#else
+ AvlTree() : root(0), head(0), tail(0), treeSize(0) { }
+#endif
+
+ /**
+ * \brief Perform a deep copy of the tree.
+ *
+ * Each element is duplicated for the new tree. Copy constructors are used
+ * to create the new elements.
+ */
+ AvlTree(const AvlTree &other);
+
+#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET )
+ /**
+ * \brief Clear the contents of the tree.
+ *
+ * All element are deleted.
+ */
+ ~AvlTree() { empty(); }
+
+ /**
+ * \brief Perform a deep copy of the tree.
+ *
+ * Each element is duplicated for the new tree. Copy constructors are used
+ * to create the new element. If this tree contains items, they are first
+ * deleted.
+ *
+ * \returns A reference to this.
+ */
+ AvlTree &operator=( const AvlTree &tree );
+
+ /**
+ * \brief Transfer the elements of another tree into this.
+ *
+ * First deletes all elements in this tree.
+ */
+ void transfer( AvlTree &tree );
+#else
+ /**
+ * \brief Abandon all elements in the tree.
+ *
+ * Tree elements are not deleted.
+ */
+ ~AvlTree() {}
+
+ /**
+ * \brief Perform a deep copy of the tree.
+ *
+ * Each element is duplicated for the new tree. Copy constructors are used
+ * to create the new element. If this tree contains items, they are
+ * abandoned.
+ *
+ * \returns A reference to this.
+ */
+ AvlTree &operator=( const AvlTree &tree );
+
+ /**
+ * \brief Transfer the elements of another tree into this.
+ *
+ * All elements in this tree are abandoned first.
+ */
+ void transfer( AvlTree &tree );
+#endif
+
+#ifndef AVL_KEYLESS
+ /* Insert a element into the tree. */
+ Element *insert( Element *element, Element **lastFound = 0 );
+
+#ifdef AVL_BASIC
+ /* Find a element in the tree. Returns the element if
+ * element exists, false otherwise. */
+ Element *find( const Element *element ) const;
+
+#else
+ Element *insert( const Key &key, Element **lastFound = 0 );
+
+#ifdef AVLTREE_MAP
+ Element *insert( const Key &key, const Value &val,
+ Element **lastFound = 0 );
+#endif
+
+ /* Find a element in the tree. Returns the element if
+ * key exists, false otherwise. */
+ Element *find( const Key &key ) const;
+
+ /* Detach a element from the tree. */
+ Element *detach( const Key &key );
+
+ /* Detach and delete a element from the tree. */
+ bool remove( const Key &key );
+#endif /* AVL_BASIC */
+#endif /* AVL_KEYLESS */
+
+ /* Detach a element from the tree. */
+ Element *detach( Element *element );
+
+ /* Detach and delete a element from the tree. */
+ void remove( Element *element );
+
+ /* Free all memory used by tree. */
+ void empty();
+
+ /* Abandon all element in the tree. Does not delete element. */
+ void abandon();
+
+ /** Root element of the tree. */
+ Element *root;
+
+#ifndef WALKABLE
+ Element *head, *tail;
+#endif
+
+ /** The number of element in the tree. */
+ long treeSize;
+
+ /** \brief Return the number of elements in the tree. */
+ long length() const { return treeSize; }
+
+ /** \brief Return the number of elements in the tree. */
+ long size() const { return treeSize; }
+
+ /* Various classes for setting the iterator */
+ struct Iter;
+ struct IterFirst { IterFirst( const AvlTree &t ) : t(t) { } const AvlTree &t; };
+ struct IterLast { IterLast( const AvlTree &t ) : t(t) { } const AvlTree &t; };
+ struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; };
+ struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; };
+
+#ifdef WALKABLE
+ /**
+ * \brief Avl Tree Iterator.
+ * \ingroup iterators
+ */
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct from an avl tree and iterator-setting classes. */
+ Iter( const AvlTree &t ) : ptr(t.head) { }
+ Iter( const IterFirst &af ) : ptr(af.t.head) { }
+ Iter( const IterLast &al ) : ptr(al.t.tail) { }
+ Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)) { }
+ Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)) { }
+
+ /* Assign from a tree and iterator-setting classes. */
+ Iter &operator=( const AvlTree &tree ) { ptr = tree.head; return *this; }
+ Iter &operator=( const IterFirst &af ) { ptr = af.t.head; return *this; }
+ Iter &operator=( const IterLast &al ) { ptr = al.t.tail; return *this; }
+ Iter &operator=( const IterNext &an ) { ptr = findNext(an.i.ptr); return *this; }
+ Iter &operator=( const IterPrev &ap ) { ptr = findPrev(ap.i.ptr); return *this; }
+
+ /** \brief Less than end? */
+ bool lte() const { return ptr != 0; }
+
+ /** \brief At end? */
+ bool end() const { return ptr == 0; }
+
+ /** \brief Greater than beginning? */
+ bool gtb() const { return ptr != 0; }
+
+ /** \brief At beginning? */
+ bool beg() const { return ptr == 0; }
+
+ /** \brief At first element? */
+ bool first() const { return ptr && ptr->BASE_EL(prev) == 0; }
+
+ /** \brief At last element? */
+ bool last() const { return ptr && ptr->BASE_EL(next) == 0; }
+
+ /** \brief Implicit cast to Element*. */
+ operator Element*() const { return ptr; }
+
+ /** \brief Dereference operator returns Element&. */
+ Element &operator *() const { return *ptr; }
+
+ /** \brief Arrow operator returns Element*. */
+ Element *operator->() const { return ptr; }
+
+ /** \brief Move to next item. */
+ inline Element *operator++();
+
+ /** \brief Move to next item. */
+ inline Element *operator++(int);
+
+ /** \brief Move to next item. */
+ inline Element *increment();
+
+ /** \brief Move to previous item. */
+ inline Element *operator--();
+
+ /** \brief Move to previous item. */
+ inline Element *operator--(int);
+
+ /** \brief Move to previous item. */
+ inline Element *decrement();
+
+ /** \brief Return the next item. Does not modify this. */
+ IterNext next() const { return IterNext( *this ); }
+
+ /** \brief Return the previous item. Does not modify this. */
+ IterPrev prev() const { return IterPrev( *this ); }
+
+ private:
+ static Element *findPrev( Element *element ) { return element->BASE_EL(prev); }
+ static Element *findNext( Element *element ) { return element->BASE_EL(next); }
+
+ public:
+
+ /** \brief The iterator is simply a pointer. */
+ Element *ptr;
+ };
+
+#else
+
+ /**
+ * \brief Avl Tree Iterator.
+ * \ingroup iterators
+ */
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0), tree(0) { }
+
+ /* Construct from a tree and iterator-setting classes. */
+ Iter( const AvlTree &t ) : ptr(t.head), tree(&t) { }
+ Iter( const IterFirst &af ) : ptr(af.t.head), tree(&af.t) { }
+ Iter( const IterLast &al ) : ptr(al.t.tail), tree(&al.t) { }
+ Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)), tree(an.i.tree) { }
+ Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)), tree(ap.i.tree) { }
+
+ /* Assign from a tree and iterator-setting classes. */
+ Iter &operator=( const AvlTree &t )
+ { ptr = t.head; tree = &t; return *this; }
+ Iter &operator=( const IterFirst &af )
+ { ptr = af.t.head; tree = &af.t; return *this; }
+ Iter &operator=( const IterLast &al )
+ { ptr = al.t.tail; tree = &al.t; return *this; }
+ Iter &operator=( const IterNext &an )
+ { ptr = findNext(an.i.ptr); tree = an.i.tree; return *this; }
+ Iter &operator=( const IterPrev &ap )
+ { ptr = findPrev(ap.i.ptr); tree = ap.i.tree; return *this; }
+
+ /** \brief Less than end? */
+ bool lte() const { return ptr != 0; }
+
+ /** \brief At end? */
+ bool end() const { return ptr == 0; }
+
+ /** \brief Greater than beginning? */
+ bool gtb() const { return ptr != 0; }
+
+ /** \brief At beginning? */
+ bool beg() const { return ptr == 0; }
+
+ /** \brief At first element? */
+ bool first() const { return ptr && ptr == tree->head; }
+
+ /** \brief At last element? */
+ bool last() const { return ptr && ptr == tree->tail; }
+
+ /** \brief Implicit cast to Element*. */
+ operator Element*() const { return ptr; }
+
+ /** \brief Dereference operator returns Element&. */
+ Element &operator *() const { return *ptr; }
+
+ /** \brief Arrow operator returns Element*. */
+ Element *operator->() const { return ptr; }
+
+ /** \brief Move to next item. */
+ inline Element *operator++();
+
+ /** \brief Move to next item. */
+ inline Element *operator++(int);
+
+ /** \brief Move to next item. */
+ inline Element *increment();
+
+ /** \brief Move to previous item. */
+ inline Element *operator--();
+
+ /** \brief Move to previous item. */
+ inline Element *operator--(int);
+
+ /** \brief Move to previous item. */
+ inline Element *decrement();
+
+ /** \brief Return the next item. Does not modify this. */
+ IterNext next() const { return IterNext( *this ); }
+
+ /** \brief Return the previous item. Does not modify this. */
+ IterPrev prev() const { return IterPrev( *this ); }
+
+ private:
+ static Element *findPrev( Element *element );
+ static Element *findNext( Element *element );
+
+ public:
+ /** \brief The iterator is simply a pointer. */
+ Element *ptr;
+
+ /* The list is not walkable so we need to keep a pointerto the tree
+ * so we can test against head and tail in O(1) time. */
+ const AvlTree *tree;
+ };
+#endif
+
+ /** \brief Return first element. */
+ IterFirst first() { return IterFirst( *this ); }
+
+ /** \brief Return last element. */
+ IterLast last() { return IterLast( *this ); }
+
+protected:
+ /* Recursive worker for the copy constructor. */
+ Element *copyBranch( Element *element );
+
+ /* Recursively delete element in the tree. */
+ void deleteChildrenOf(Element *n);
+
+ /* rebalance the tree beginning at the leaf whose
+ * grandparent is unbalanced. */
+ Element *rebalance(Element *start);
+
+ /* Move up the tree from a given element, recalculating the heights. */
+ void recalcHeights(Element *start);
+
+ /* Move up the tree and find the first element whose
+ * grand-parent is unbalanced. */
+ Element *findFirstUnbalGP(Element *start);
+
+ /* Move up the tree and find the first element which is unbalanced. */
+ Element *findFirstUnbalEl(Element *start);
+
+ /* Replace a element in the tree with another element not in the tree. */
+ void replaceEl(Element *element, Element *replacement);
+
+ /* Remove a element from the tree and put another (normally a child of element)
+ * in its place. */
+ void removeEl(Element *element, Element *filler);
+
+ /* Once an insertion point is found at a leaf then do the insert. */
+ void attachRebal( Element *element, Element *parentEl, Element *lastLess );
+};
+
+/* Copy constructor. New up each item. */
+template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE>::
+ AvlTree(const AvlTree<AVLMEL_TEMPUSE> &other)
+#ifdef WALKABLE
+:
+ /* Make an empty list, copyBranch will fill in the details for us. */
+ BASELIST()
+#endif
+{
+ treeSize = other.treeSize;
+ root = other.root;
+
+#ifndef WALKABLE
+ head = 0;
+ tail = 0;
+#endif
+
+ /* If there is a root, copy the tree. */
+ if ( other.root != 0 )
+ root = copyBranch( other.root );
+}
+
+#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET )
+
+/* Assignment does deep copy. */
+template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE> &AvlTree<AVLMEL_TEMPUSE>::
+ operator=( const AvlTree &other )
+{
+ /* Clear the tree first. */
+ empty();
+
+ /* Reset the list pointers, the tree copy will fill in the list for us. */
+#ifdef WALKABLE
+ BASELIST::abandon();
+#else
+ head = 0;
+ tail = 0;
+#endif
+
+ /* Copy the entire tree. */
+ treeSize = other.treeSize;
+ root = other.root;
+ if ( other.root != 0 )
+ root = copyBranch( other.root );
+ return *this;
+}
+
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ transfer(AvlTree<AVLMEL_TEMPUSE> &other)
+{
+ /* Clear the tree first. */
+ empty();
+
+ treeSize = other.treeSize;
+ root = other.root;
+
+#ifdef WALKABLE
+ BASELIST::shallowCopy( other );
+#else
+ head = other.head;
+ tail = other.tail;
+#endif
+
+ other.abandon();
+}
+
+#else /* ! AVLTREE_MAP && ! AVLTREE_SET */
+
+/* Assignment does deep copy. This version does not clear the tree first. */
+template <AVLMEL_TEMPDEF> AvlTree<AVLMEL_TEMPUSE> &AvlTree<AVLMEL_TEMPUSE>::
+ operator=( const AvlTree &other )
+{
+ /* Reset the list pointers, the tree copy will fill in the list for us. */
+#ifdef WALKABLE
+ BASELIST::abandon();
+#else
+ head = 0;
+ tail = 0;
+#endif
+
+ /* Copy the entire tree. */
+ treeSize = other.treeSize;
+ root = other.root;
+ if ( other.root != 0 )
+ root = copyBranch( other.root );
+ return *this;
+}
+
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ transfer(AvlTree<AVLMEL_TEMPUSE> &other)
+{
+ treeSize = other.treeSize;
+ root = other.root;
+
+#ifdef WALKABLE
+ BASELIST::shallowCopy( other );
+#else
+ head = other.head;
+ tail = other.tail;
+#endif
+
+ other.abandon();
+}
+
+#endif
+
+/*
+ * Iterator operators.
+ */
+
+/* Prefix ++ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ operator++()
+{
+ return ptr = findNext( ptr );
+}
+
+/* Postfix ++ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ operator++(int)
+{
+ Element *rtn = ptr;
+ ptr = findNext( ptr );
+ return rtn;
+}
+
+/* increment */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ increment()
+{
+ return ptr = findNext( ptr );
+}
+
+/* Prefix -- */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ operator--()
+{
+ return ptr = findPrev( ptr );
+}
+
+/* Postfix -- */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ operator--(int)
+{
+ Element *rtn = ptr;
+ ptr = findPrev( ptr );
+ return rtn;
+}
+
+/* decrement */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ decrement()
+{
+ return ptr = findPrev( ptr );
+}
+
+#ifndef WALKABLE
+
+/* Move ahead one. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ findNext( Element *element )
+{
+ /* Try to go right once then infinite left. */
+ if ( element->BASE_EL(right) != 0 ) {
+ element = element->BASE_EL(right);
+ while ( element->BASE_EL(left) != 0 )
+ element = element->BASE_EL(left);
+ }
+ else {
+ /* Go up to parent until we were just a left child. */
+ while ( true ) {
+ Element *last = element;
+ element = element->BASE_EL(parent);
+ if ( element == 0 || element->BASE_EL(left) == last )
+ break;
+ }
+ }
+ return element;
+}
+
+/* Move back one. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::Iter::
+ findPrev( Element *element )
+{
+ /* Try to go left once then infinite right. */
+ if ( element->BASE_EL(left) != 0 ) {
+ element = element->BASE_EL(left);
+ while ( element->BASE_EL(right) != 0 )
+ element = element->BASE_EL(right);
+ }
+ else {
+ /* Go up to parent until we were just a left child. */
+ while ( true ) {
+ Element *last = element;
+ element = element->BASE_EL(parent);
+ if ( element == 0 || element->BASE_EL(right) == last )
+ break;
+ }
+ }
+ return element;
+}
+
+#endif
+
+
+/* Recursive worker for tree copying. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ copyBranch( Element *element )
+{
+ /* Duplicate element. Either the base element's copy constructor or defaul
+ * constructor will get called. Both will suffice for initting the
+ * pointers to null when they need to be. */
+ Element *retVal = new Element(*element);
+
+ /* If the left tree is there, copy it. */
+ if ( retVal->BASE_EL(left) ) {
+ retVal->BASE_EL(left) = copyBranch(retVal->BASE_EL(left));
+ retVal->BASE_EL(left)->BASE_EL(parent) = retVal;
+ }
+
+#ifdef WALKABLE
+ BASELIST::addAfter( BASELIST::tail, retVal );
+#else
+ if ( head == 0 )
+ head = retVal;
+ tail = retVal;
+#endif
+
+ /* If the right tree is there, copy it. */
+ if ( retVal->BASE_EL(right) ) {
+ retVal->BASE_EL(right) = copyBranch(retVal->BASE_EL(right));
+ retVal->BASE_EL(right)->BASE_EL(parent) = retVal;
+ }
+ return retVal;
+}
+
+/* Once an insertion position is found, attach a element to the tree. */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ attachRebal( Element *element, Element *parentEl, Element *lastLess )
+{
+ /* Increment the number of element in the tree. */
+ treeSize += 1;
+
+ /* Set element's parent. */
+ element->BASE_EL(parent) = parentEl;
+
+ /* New element always starts as a leaf with height 1. */
+ element->BASE_EL(left) = 0;
+ element->BASE_EL(right) = 0;
+ element->BASE_EL(height) = 1;
+
+ /* Are we inserting in the tree somewhere? */
+ if ( parentEl != 0 ) {
+ /* We have a parent so we are somewhere in the tree. If the parent
+ * equals lastLess, then the last traversal in the insertion went
+ * left, otherwise it went right. */
+ if ( lastLess == parentEl ) {
+ parentEl->BASE_EL(left) = element;
+#ifdef WALKABLE
+ BASELIST::addBefore( parentEl, element );
+#endif
+ }
+ else {
+ parentEl->BASE_EL(right) = element;
+#ifdef WALKABLE
+ BASELIST::addAfter( parentEl, element );
+#endif
+ }
+
+#ifndef WALKABLE
+ /* Maintain the first and last pointers. */
+ if ( head->BASE_EL(left) == element )
+ head = element;
+
+ /* Maintain the first and last pointers. */
+ if ( tail->BASE_EL(right) == element )
+ tail = element;
+#endif
+ }
+ else {
+ /* No parent element so we are inserting the root. */
+ root = element;
+#ifdef WALKABLE
+ BASELIST::addAfter( BASELIST::tail, element );
+#else
+ head = tail = element;
+#endif
+ }
+
+
+ /* Recalculate the heights. */
+ recalcHeights(parentEl);
+
+ /* Find the first unbalance. */
+ Element *ub = findFirstUnbalGP(element);
+
+ /* rebalance. */
+ if ( ub != 0 )
+ {
+ /* We assert that after this single rotation the
+ * tree is now properly balanced. */
+ rebalance(ub);
+ }
+}
+
+#ifndef AVL_KEYLESS
+
+/**
+ * \brief Insert an existing element into the tree.
+ *
+ * If the insert succeeds and lastFound is given then it is set to the element
+ * inserted. If the insert fails then lastFound is set to the existing element in
+ * the tree that has the same key as element. If the element's avl pointers are
+ * already in use then undefined behaviour results.
+ *
+ * \returns The element inserted upon success, null upon failure.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ insert( Element *element, Element **lastFound )
+{
+ long keyRelation;
+ Element *curEl = root, *parentEl = 0;
+ Element *lastLess = 0;
+
+ while (true) {
+ if ( curEl == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Attach underneath the leaf and rebalance. */
+ attachRebal( element, parentEl, lastLess );
+
+ if ( lastFound != 0 )
+ *lastFound = element;
+ return element;
+ }
+
+#ifdef AVL_BASIC
+ keyRelation = compare( *element, *curEl );
+#else
+ keyRelation = compare( element->BASEKEY(getKey()),
+ curEl->BASEKEY(getKey()) );
+#endif
+
+ /* Do we go left? */
+ if ( keyRelation < 0 ) {
+ parentEl = lastLess = curEl;
+ curEl = curEl->BASE_EL(left);
+ }
+ /* Do we go right? */
+ else if ( keyRelation > 0 ) {
+ parentEl = curEl;
+ curEl = curEl->BASE_EL(right);
+ }
+ /* We have hit the target. */
+ else {
+ if ( lastFound != 0 )
+ *lastFound = curEl;
+ return 0;
+ }
+ }
+}
+
+#ifdef AVL_BASIC
+
+/**
+ * \brief Find a element in the tree with the given key.
+ *
+ * \returns The element if key exists, null if the key does not exist.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ find( const Element *element ) const
+{
+ Element *curEl = root;
+ long keyRelation;
+
+ while (curEl) {
+ keyRelation = compare( *element, *curEl );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 )
+ curEl = curEl->BASE_EL(left);
+ /* Do we go right? */
+ else if ( keyRelation > 0 )
+ curEl = curEl->BASE_EL(right);
+ /* We have hit the target. */
+ else {
+ return curEl;
+ }
+ }
+ return 0;
+}
+
+#else
+
+/**
+ * \brief Insert a new element into the tree with given key.
+ *
+ * If the key is not already in the tree then a new element is made using the
+ * Element(const Key &key) constructor and the insert succeeds. If lastFound is
+ * given then it is set to the element inserted. If the insert fails then
+ * lastFound is set to the existing element in the tree that has the same key as
+ * element.
+ *
+ * \returns The new element upon success, null upon failure.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ insert( const Key &key, Element **lastFound )
+{
+ long keyRelation;
+ Element *curEl = root, *parentEl = 0;
+ Element *lastLess = 0;
+
+ while (true) {
+ if ( curEl == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Create the new element, attach it underneath the leaf
+ * and rebalance. */
+ Element *element = new Element( key );
+ attachRebal( element, parentEl, lastLess );
+
+ if ( lastFound != 0 )
+ *lastFound = element;
+ return element;
+ }
+
+ keyRelation = compare( key, curEl->BASEKEY(getKey()) );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 ) {
+ parentEl = lastLess = curEl;
+ curEl = curEl->BASE_EL(left);
+ }
+ /* Do we go right? */
+ else if ( keyRelation > 0 ) {
+ parentEl = curEl;
+ curEl = curEl->BASE_EL(right);
+ }
+ /* We have hit the target. */
+ else {
+ if ( lastFound != 0 )
+ *lastFound = curEl;
+ return 0;
+ }
+ }
+}
+
+#ifdef AVLTREE_MAP
+/**
+ * \brief Insert a new element into the tree with key and value.
+ *
+ * If the key is not already in the tree then a new element is constructed and
+ * the insert succeeds. If lastFound is given then it is set to the element
+ * inserted. If the insert fails then lastFound is set to the existing element in
+ * the tree that has the same key as element. This insert routine is only
+ * available in AvlMap because it is the only class that knows about a Value
+ * type.
+ *
+ * \returns The new element upon success, null upon failure.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ insert( const Key &key, const Value &val, Element **lastFound )
+{
+ long keyRelation;
+ Element *curEl = root, *parentEl = 0;
+ Element *lastLess = 0;
+
+ while (true) {
+ if ( curEl == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Create the new element, attach it underneath the leaf
+ * and rebalance. */
+ Element *element = new Element( key, val );
+ attachRebal( element, parentEl, lastLess );
+
+ if ( lastFound != 0 )
+ *lastFound = element;
+ return element;
+ }
+
+ keyRelation = compare(key, curEl->getKey());
+
+ /* Do we go left? */
+ if ( keyRelation < 0 ) {
+ parentEl = lastLess = curEl;
+ curEl = curEl->BASE_EL(left);
+ }
+ /* Do we go right? */
+ else if ( keyRelation > 0 ) {
+ parentEl = curEl;
+ curEl = curEl->BASE_EL(right);
+ }
+ /* We have hit the target. */
+ else {
+ if ( lastFound != 0 )
+ *lastFound = curEl;
+ return 0;
+ }
+ }
+}
+#endif /* AVLTREE_MAP */
+
+
+/**
+ * \brief Find a element in the tree with the given key.
+ *
+ * \returns The element if key exists, null if the key does not exist.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ find( const Key &key ) const
+{
+ Element *curEl = root;
+ long keyRelation;
+
+ while (curEl) {
+ keyRelation = compare( key, curEl->BASEKEY(getKey()) );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 )
+ curEl = curEl->BASE_EL(left);
+ /* Do we go right? */
+ else if ( keyRelation > 0 )
+ curEl = curEl->BASE_EL(right);
+ /* We have hit the target. */
+ else {
+ return curEl;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * \brief Find a element, then detach it from the tree.
+ *
+ * The element is not deleted.
+ *
+ * \returns The element detached if the key is found, othewise returns null.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ detach(const Key &key)
+{
+ Element *element = find( key );
+ if ( element ) {
+ detach(element);
+ }
+
+ return element;
+}
+
+/**
+ * \brief Find, detach and delete a element from the tree.
+ *
+ * \returns True if the element was found and deleted, false otherwise.
+ */
+template <AVLMEL_TEMPDEF> bool AvlTree<AVLMEL_TEMPUSE>::
+ remove(const Key &key)
+{
+ /* Assume not found. */
+ bool retVal = false;
+
+ /* Look for the key. */
+ Element *element = find( key );
+ if ( element != 0 ) {
+ /* If found, detach the element and delete. */
+ detach( element );
+ delete element;
+ retVal = true;
+ }
+
+ return retVal;
+}
+
+#endif /* AVL_BASIC */
+#endif /* AVL_KEYLESS */
+
+
+/**
+ * \brief Detach and delete a element from the tree.
+ *
+ * If the element is not in the tree then undefined behaviour results.
+ */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ remove(Element *element)
+{
+ /* Detach and delete. */
+ detach(element);
+ delete element;
+}
+
+/**
+ * \brief Detach a element from the tree.
+ *
+ * If the element is not in the tree then undefined behaviour results.
+ *
+ * \returns The element given.
+ */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ detach(Element *element)
+{
+ Element *replacement, *fixfrom;
+ long lheight, rheight;
+
+#ifdef WALKABLE
+ /* Remove the element from the ordered list. */
+ BASELIST::detach( element );
+#endif
+
+ /* Update treeSize. */
+ treeSize--;
+
+ /* Find a replacement element. */
+ if (element->BASE_EL(right))
+ {
+ /* Find the leftmost element of the right subtree. */
+ replacement = element->BASE_EL(right);
+ while (replacement->BASE_EL(left))
+ replacement = replacement->BASE_EL(left);
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->BASE_EL(parent) == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->BASE_EL(parent);
+
+#ifndef WALKABLE
+ if ( element == head )
+ head = replacement;
+#endif
+
+ removeEl(replacement, replacement->BASE_EL(right));
+ replaceEl(element, replacement);
+ }
+ else if (element->BASE_EL(left))
+ {
+ /* Find the rightmost element of the left subtree. */
+ replacement = element->BASE_EL(left);
+ while (replacement->BASE_EL(right))
+ replacement = replacement->BASE_EL(right);
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->BASE_EL(parent) == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->BASE_EL(parent);
+
+#ifndef WALKABLE
+ if ( element == tail )
+ tail = replacement;
+#endif
+
+ removeEl(replacement, replacement->BASE_EL(left));
+ replaceEl(element, replacement);
+ }
+ else
+ {
+ /* We need to start fixing at the parent of the element. */
+ fixfrom = element->BASE_EL(parent);
+
+#ifndef WALKABLE
+ if ( element == head )
+ head = element->BASE_EL(parent);
+ if ( element == tail )
+ tail = element->BASE_EL(parent);
+#endif
+
+ /* The element we are deleting is a leaf element. */
+ removeEl(element, 0);
+ }
+
+ /* If fixfrom is null it means we just deleted
+ * the root of the tree. */
+ if ( fixfrom == 0 )
+ return element;
+
+ /* Fix the heights after the deletion. */
+ recalcHeights(fixfrom);
+
+ /* Fix every unbalanced element going up in the tree. */
+ Element *ub = findFirstUnbalEl(fixfrom);
+ while ( ub )
+ {
+ /* Find the element to rebalance by moving down from the first unbalanced
+ * element 2 levels in the direction of the greatest heights. On the
+ * second move down, the heights may be equal ( but not on the first ).
+ * In which case go in the direction of the first move. */
+ lheight = ub->BASE_EL(left) ? ub->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = ub->BASE_EL(right) ? ub->BASE_EL(right)->BASE_EL(height) : 0;
+ assert( lheight != rheight );
+ if (rheight > lheight)
+ {
+ ub = ub->BASE_EL(right);
+ lheight = ub->BASE_EL(left) ?
+ ub->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = ub->BASE_EL(right) ?
+ ub->BASE_EL(right)->BASE_EL(height) : 0;
+ if (rheight > lheight)
+ ub = ub->BASE_EL(right);
+ else if (rheight < lheight)
+ ub = ub->BASE_EL(left);
+ else
+ ub = ub->BASE_EL(right);
+ }
+ else
+ {
+ ub = ub->BASE_EL(left);
+ lheight = ub->BASE_EL(left) ?
+ ub->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = ub->BASE_EL(right) ?
+ ub->BASE_EL(right)->BASE_EL(height) : 0;
+ if (rheight > lheight)
+ ub = ub->BASE_EL(right);
+ else if (rheight < lheight)
+ ub = ub->BASE_EL(left);
+ else
+ ub = ub->BASE_EL(left);
+ }
+
+
+ /* rebalance returns the grandparant of the subtree formed
+ * by the element that were rebalanced.
+ * We must continue upward from there rebalancing. */
+ fixfrom = rebalance(ub);
+
+ /* Find the next unbalaced element. */
+ ub = findFirstUnbalEl(fixfrom);
+ }
+
+ return element;
+}
+
+
+/**
+ * \brief Empty the tree and delete all the element.
+ *
+ * Resets the tree to its initial state.
+ */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::empty()
+{
+ if ( root ) {
+ /* Recursively delete from the tree structure. */
+ deleteChildrenOf(root);
+ delete root;
+ root = 0;
+ treeSize = 0;
+
+#ifdef WALKABLE
+ BASELIST::abandon();
+#endif
+ }
+}
+
+/**
+ * \brief Forget all element in the tree.
+ *
+ * Does not delete element. Resets the the tree to it's initial state.
+ */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::abandon()
+{
+ root = 0;
+ treeSize = 0;
+
+#ifdef WALKABLE
+ BASELIST::abandon();
+#endif
+}
+
+/* Recursively delete all the children of a element. */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ deleteChildrenOf( Element *element )
+{
+ /* Recurse left. */
+ if (element->BASE_EL(left)) {
+ deleteChildrenOf(element->BASE_EL(left));
+
+ /* Delete left element. */
+ delete element->BASE_EL(left);
+ element->BASE_EL(left) = 0;
+ }
+
+ /* Recurse right. */
+ if (element->BASE_EL(right)) {
+ deleteChildrenOf(element->BASE_EL(right));
+
+ /* Delete right element. */
+ delete element->BASE_EL(right);
+ element->BASE_EL(left) = 0;
+ }
+}
+
+/* rebalance from a element whose gradparent is unbalanced. Only
+ * call on a element that has a grandparent. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ rebalance(Element *n)
+{
+ long lheight, rheight;
+ Element *a, *b, *c;
+ Element *t1, *t2, *t3, *t4;
+
+ Element *p = n->BASE_EL(parent); /* parent (Non-NUL). L*/
+ Element *gp = p->BASE_EL(parent); /* Grand-parent (Non-NULL). */
+ Element *ggp = gp->BASE_EL(parent); /* Great grand-parent (may be NULL). */
+
+ if (gp->BASE_EL(right) == p)
+ {
+ /* gp
+ * \
+ * p
+ */
+ if (p->BASE_EL(right) == n)
+ {
+ /* gp
+ * \
+ * p
+ * \
+ * n
+ */
+ a = gp;
+ b = p;
+ c = n;
+ t1 = gp->BASE_EL(left);
+ t2 = p->BASE_EL(left);
+ t3 = n->BASE_EL(left);
+ t4 = n->BASE_EL(right);
+ }
+ else
+ {
+ /* gp
+ * \
+ * p
+ * /
+ * n
+ */
+ a = gp;
+ b = n;
+ c = p;
+ t1 = gp->BASE_EL(left);
+ t2 = n->BASE_EL(left);
+ t3 = n->BASE_EL(right);
+ t4 = p->BASE_EL(right);
+ }
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ */
+ if (p->BASE_EL(right) == n)
+ {
+ /* gp
+ * /
+ * p
+ * \
+ * n
+ */
+ a = p;
+ b = n;
+ c = gp;
+ t1 = p->BASE_EL(left);
+ t2 = n->BASE_EL(left);
+ t3 = n->BASE_EL(right);
+ t4 = gp->BASE_EL(right);
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ * /
+ * n
+ */
+ a = n;
+ b = p;
+ c = gp;
+ t1 = n->BASE_EL(left);
+ t2 = n->BASE_EL(right);
+ t3 = p->BASE_EL(right);
+ t4 = gp->BASE_EL(right);
+ }
+ }
+
+ /* Perform rotation.
+ */
+
+ /* Tie b to the great grandparent. */
+ if ( ggp == 0 )
+ root = b;
+ else if ( ggp->BASE_EL(left) == gp )
+ ggp->BASE_EL(left) = b;
+ else
+ ggp->BASE_EL(right) = b;
+ b->BASE_EL(parent) = ggp;
+
+ /* Tie a as a leftchild of b. */
+ b->BASE_EL(left) = a;
+ a->BASE_EL(parent) = b;
+
+ /* Tie c as a rightchild of b. */
+ b->BASE_EL(right) = c;
+ c->BASE_EL(parent) = b;
+
+ /* Tie t1 as a leftchild of a. */
+ a->BASE_EL(left) = t1;
+ if ( t1 != 0 ) t1->BASE_EL(parent) = a;
+
+ /* Tie t2 as a rightchild of a. */
+ a->BASE_EL(right) = t2;
+ if ( t2 != 0 ) t2->BASE_EL(parent) = a;
+
+ /* Tie t3 as a leftchild of c. */
+ c->BASE_EL(left) = t3;
+ if ( t3 != 0 ) t3->BASE_EL(parent) = c;
+
+ /* Tie t4 as a rightchild of c. */
+ c->BASE_EL(right) = t4;
+ if ( t4 != 0 ) t4->BASE_EL(parent) = c;
+
+ /* The heights are all recalculated manualy and the great
+ * grand-parent is passed to recalcHeights() to ensure
+ * the heights are correct up the tree.
+ *
+ * Note that recalcHeights() cuts out when it comes across
+ * a height that hasn't changed.
+ */
+
+ /* Fix height of a. */
+ lheight = a->BASE_EL(left) ? a->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = a->BASE_EL(right) ? a->BASE_EL(right)->BASE_EL(height) : 0;
+ a->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of c. */
+ lheight = c->BASE_EL(left) ? c->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = c->BASE_EL(right) ? c->BASE_EL(right)->BASE_EL(height) : 0;
+ c->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b. */
+ lheight = a->BASE_EL(height);
+ rheight = c->BASE_EL(height);
+ b->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b's parents. */
+ recalcHeights(ggp);
+ return ggp;
+}
+
+/* Recalculates the heights of all the ancestors of element. */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ recalcHeights(Element *element)
+{
+ long lheight, rheight, new_height;
+ while ( element != 0 )
+ {
+ lheight = element->BASE_EL(left) ? element->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = element->BASE_EL(right) ? element->BASE_EL(right)->BASE_EL(height) : 0;
+
+ new_height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* If there is no chage in the height, then there will be no
+ * change in any of the ancestor's height. We can stop going up.
+ * If there was a change, continue upward. */
+ if (new_height == element->BASE_EL(height))
+ return;
+ else
+ element->BASE_EL(height) = new_height;
+
+ element = element->BASE_EL(parent);
+ }
+}
+
+/* Finds the first element whose grandparent is unbalanced. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ findFirstUnbalGP(Element *element)
+{
+ long lheight, rheight, balanceProp;
+ Element *gp;
+
+ if ( element == 0 || element->BASE_EL(parent) == 0 ||
+ element->BASE_EL(parent)->BASE_EL(parent) == 0 )
+ return 0;
+
+ /* Don't do anything if we we have no grandparent. */
+ gp = element->BASE_EL(parent)->BASE_EL(parent);
+ while ( gp != 0 )
+ {
+ lheight = gp->BASE_EL(left) ? gp->BASE_EL(left)->BASE_EL(height) : 0;
+ rheight = gp->BASE_EL(right) ? gp->BASE_EL(right)->BASE_EL(height) : 0;
+ balanceProp = lheight - rheight;
+
+ if ( balanceProp < -1 || balanceProp > 1 )
+ return element;
+
+ element = element->BASE_EL(parent);
+ gp = gp->BASE_EL(parent);
+ }
+ return 0;
+}
+
+
+/* Finds the first element that is unbalanced. */
+template <AVLMEL_TEMPDEF> Element *AvlTree<AVLMEL_TEMPUSE>::
+ findFirstUnbalEl(Element *element)
+{
+ if ( element == 0 )
+ return 0;
+
+ while ( element != 0 )
+ {
+ long lheight = element->BASE_EL(left) ?
+ element->BASE_EL(left)->BASE_EL(height) : 0;
+ long rheight = element->BASE_EL(right) ?
+ element->BASE_EL(right)->BASE_EL(height) : 0;
+ long balanceProp = lheight - rheight;
+
+ if ( balanceProp < -1 || balanceProp > 1 )
+ return element;
+
+ element = element->BASE_EL(parent);
+ }
+ return 0;
+}
+
+/* Replace a element in the tree with another element not in the tree. */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ replaceEl(Element *element, Element *replacement)
+{
+ Element *parent = element->BASE_EL(parent),
+ *left = element->BASE_EL(left),
+ *right = element->BASE_EL(right);
+
+ replacement->BASE_EL(left) = left;
+ if (left)
+ left->BASE_EL(parent) = replacement;
+ replacement->BASE_EL(right) = right;
+ if (right)
+ right->BASE_EL(parent) = replacement;
+
+ replacement->BASE_EL(parent) = parent;
+ if (parent)
+ {
+ if (parent->BASE_EL(left) == element)
+ parent->BASE_EL(left) = replacement;
+ else
+ parent->BASE_EL(right) = replacement;
+ }
+ else
+ root = replacement;
+
+ replacement->BASE_EL(height) = element->BASE_EL(height);
+}
+
+/* Removes a element from a tree and puts filler in it's place.
+ * Filler should be null or a child of element. */
+template <AVLMEL_TEMPDEF> void AvlTree<AVLMEL_TEMPUSE>::
+ removeEl(Element *element, Element *filler)
+{
+ Element *parent = element->BASE_EL(parent);
+
+ if (parent)
+ {
+ if (parent->BASE_EL(left) == element)
+ parent->BASE_EL(left) = filler;
+ else
+ parent->BASE_EL(right) = filler;
+ }
+ else
+ root = filler;
+
+ if (filler)
+ filler->BASE_EL(parent) = parent;
+
+ return;
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
diff --git a/aapl/avlibasic.h b/aapl/avlibasic.h
new file mode 100644
index 0000000..a48faaa
--- /dev/null
+++ b/aapl/avlibasic.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLIBASIC_H
+#define _AAPL_AVLIBASIC_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliBasic
+ * \brief Linked AVL Tree in which the entire element structure is the key.
+ *
+ * AvliBasic is a linked AVL tree that does not distinguish between the
+ * element that it contains and the key. The entire element structure is the
+ * key that is used to compare the relative ordering of elements. This is
+ * similar to the BstSet structure.
+ *
+ * AvliBasic does not assume ownership of elements in the tree. Items must be
+ * explicitly de-allocated.
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Element, class Compare
+#define AVLMEL_TEMPDEF class Element, class Compare
+#define AVLMEL_TEMPUSE Element, Compare
+#define AvlTree AvliBasic
+#define AVL_BASIC
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef AVL_BASIC
+#undef WALKABLE
+
+#endif /* _AAPL_AVLIBASIC_H */
diff --git a/aapl/avlikeyless.h b/aapl/avlikeyless.h
new file mode 100644
index 0000000..559b75a
--- /dev/null
+++ b/aapl/avlikeyless.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2002, 2003 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLIKEYLESS_H
+#define _AAPL_AVLIKEYLESS_H
+
+#include "compare.h"
+#include "dlistmel.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliKeyless
+ * \brief Linked AVL tree that has no insert/find/remove functions that take a
+ * key.
+ *
+ * AvliKeyless is an implementation of the AVL tree rebalancing functionality
+ * only. It provides the common code for the tiny AVL tree implementations.
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define BASELIST DListMel< Element, AvliTreeEl<Element> >
+#define AVLMEL_CLASSDEF class Element
+#define AVLMEL_TEMPDEF class Element
+#define AVLMEL_TEMPUSE Element
+#define AvlTree AvliKeyless
+#define WALKABLE
+#define AVL_KEYLESS
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef WALKABLE
+#undef AVL_KEYLESS
+
+#endif /* _AAPL_AVLIKEYLESS_H */
diff --git a/aapl/avlimap.h b/aapl/avlimap.h
new file mode 100644
index 0000000..38bfff7
--- /dev/null
+++ b/aapl/avlimap.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLIMAP_H
+#define _AAPL_AVLIMAP_H
+
+#include "compare.h"
+#include "dlist.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliMap
+ * \brief Linked key and value oriented AVL tree.
+ *
+ * AvliMap stores key and value pairs in elements that managed by the tree. It
+ * is intendend to be similar to map template found in the STL. AvliMap
+ * requires that a Key type, a Value type, and a class containing a compare()
+ * routine for Key be given. Items can be inserted with just a key or with a
+ * key and value pair.
+ *
+ * AvliMap assumes all elements in the tree are allocated on the heap and are
+ * to be managed by the tree. This means that the class destructor will delete
+ * the contents of the tree. A deep copy will cause existing elements to be
+ * deleted first.
+ *
+ * \include ex_avlimap.cpp
+ */
+
+/*@}*/
+
+#define AVLTREE_MAP
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define BASELIST DList< AvliMapEl<Key,Value> >
+#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Key, class Value, class Compare
+#define AVLMEL_TEMPUSE Key, Value, Compare
+#define AvlTree AvliMap
+#define Element AvliMapEl<Key,Value>
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef AVLTREE_MAP
+#undef BASE_EL
+#undef BASEKEY
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef Element
+#undef WALKABLE
+
+#endif /* _AAPL_AVLIMAP_H */
diff --git a/aapl/avlimel.h b/aapl/avlimel.h
new file mode 100644
index 0000000..9442a99
--- /dev/null
+++ b/aapl/avlimel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLIMEL_H
+#define _AAPL_AVLIMEL_H
+
+#include "compare.h"
+#include "dlistmel.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliMel
+ * \brief Linked AVL tree for element appearing in multiple trees.
+ *
+ * AvliMel allows for an element to simultaneously be in multiple trees without
+ * the trees interferring with one another. For each tree that the element is
+ * to appear in, there must be a distinct set of AVL Tree management data that
+ * can be unambiguously referenced with some base class name. This name
+ * is passed to the tree as a template parameter and is used in the tree
+ * algorithms.
+ *
+ * The element must use the same key type and value in each tree that it
+ * appears in. If distinct keys are required, the AvliMelKey structure is
+ * available.
+ *
+ * AvliMel does not assume ownership of elements in the tree. The destructor
+ * will not delete the elements. If the user wishes to explicitly deallocate
+ * all the items in the tree the empty() routine is available.
+ *
+ * \include ex_avlimel.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) BaseEl::name
+#define BASEKEY(name) name
+#define BASELIST DListMel< Element, BaseEl >
+#define AVLMEL_CLASSDEF class Element, class Key, \
+ class BaseEl, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, \
+ class BaseEl, class Compare
+#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare
+#define AvlTree AvliMel
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef WALKABLE
+
+#endif /* _AAPL_AVLIMEL_H */
diff --git a/aapl/avlimelkey.h b/aapl/avlimelkey.h
new file mode 100644
index 0000000..faa56e8
--- /dev/null
+++ b/aapl/avlimelkey.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLIMELKEY_H
+#define _AAPL_AVLIMELKEY_H
+
+#include "compare.h"
+#include "dlistmel.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliMelKey
+ * \brief Linked AVL tree for element appearing in multiple trees with different keys.
+ *
+ * AvliMelKey is similar to AvliMel, except that an additional template
+ * parameter, BaseKey, is provided for resolving ambiguous references to
+ * getKey(). This means that if an element is stored in multiple trees, each
+ * tree can use a different key for ordering the elements in it. Using
+ * AvliMelKey an array of data structures can be indexed with an O(log(n))
+ * search on two or more of the values contained within it and without
+ * allocating any additional data.
+ *
+ * AvliMelKey does not assume ownership of elements in the tree. The destructor
+ * will not delete the elements. If the user wishes to explicitly deallocate
+ * all the items in the tree the empty() routine is available.
+ *
+ * \include ex_avlimelkey.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) BaseEl::name
+#define BASEKEY(name) BaseKey::name
+#define BASELIST DListMel< Element, BaseEl >
+#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \
+ class BaseKey, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \
+ class BaseKey, class Compare
+#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare
+#define AvlTree AvliMelKey
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef WALKABLE
+
+#endif /* _AAPL_AVLIMELKEY_H */
diff --git a/aapl/avliset.h b/aapl/avliset.h
new file mode 100644
index 0000000..cf5be36
--- /dev/null
+++ b/aapl/avliset.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLISET_H
+#define _AAPL_AVLISET_H
+
+#include "compare.h"
+#include "dlist.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliSet
+ * \brief Linked Key-only oriented tree.
+ *
+ * AvliSet stores only keys in elements that are managed by the tree. AvliSet
+ * requires that a Key type and a class containing a compare() routine
+ * for Key be given. Items are inserted with just a key value.
+ *
+ * AvliSet assumes all elements in the tree are allocated on the heap and are
+ * to be managed by the tree. This means that the class destructor will delete
+ * the contents of the tree. A deep copy will cause existing elements to be
+ * deleted first.
+ *
+ * \include ex_avliset.cpp
+ */
+
+/*@}*/
+
+#define AVLTREE_SET
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define BASELIST DList< AvliSetEl<Key> >
+#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Key, class Compare
+#define AVLMEL_TEMPUSE Key, Compare
+#define AvlTree AvliSet
+#define Element AvliSetEl<Key>
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef AVLTREE_SET
+#undef BASE_EL
+#undef BASEKEY
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef Element
+#undef WALKABLE
+
+#endif /* _AAPL_AVLISET_H */
diff --git a/aapl/avlitree.h b/aapl/avlitree.h
new file mode 100644
index 0000000..b053c96
--- /dev/null
+++ b/aapl/avlitree.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLITREE_H
+#define _AAPL_AVLITREE_H
+
+#include "compare.h"
+#include "dlistmel.h"
+
+/**
+ * \addtogroup avlitree
+ * @{
+ */
+
+/**
+ * \class AvliTree
+ * \brief Linked AVL tree.
+ *
+ * AvliTree is the standard linked by-structure AVL tree. To use this
+ * structure the user must define an element type and give it the necessary
+ * properties. At the very least it must have a getKey() function that will be
+ * used to compare the relative ordering of elements and tree management data
+ * necessary for the AVL algorithm. An element type can acquire the management
+ * data by inheriting the AvliTreeEl class.
+ *
+ * AvliTree does not presume to manage the allocation of elements in the tree.
+ * The destructor will not delete the items in the tree, instead the elements
+ * must be explicitly de-allocated by the user if necessary and when it is
+ * safe to do so. The empty() routine will traverse the tree and delete all
+ * items.
+ *
+ * Since the tree does not manage the elements, it can contain elements that
+ * are allocated statically or that are part of another data structure.
+ *
+ * \include ex_avlitree.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define BASELIST DListMel< Element, AvliTreeEl<Element> >
+#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, class Compare
+#define AVLMEL_TEMPUSE Element, Key, Compare
+#define AvlTree AvliTree
+#define WALKABLE
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef BASELIST
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef WALKABLE
+
+#endif /* _AAPL_AVLITREE_H */
diff --git a/aapl/avlkeyless.h b/aapl/avlkeyless.h
new file mode 100644
index 0000000..3080513
--- /dev/null
+++ b/aapl/avlkeyless.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2002, 2003 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLKEYLESS_H
+#define _AAPL_AVLKEYLESS_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlKeyless
+ * \brief AVL tree that has no insert/find/remove functions that take a key.
+ *
+ * AvlKeyless is an implementation of the AVL tree rebalancing functionality
+ * only. It provides the common code for the tiny AVL tree implementations.
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define AVLMEL_CLASSDEF class Element
+#define AVLMEL_TEMPDEF class Element
+#define AVLMEL_TEMPUSE Element
+#define AvlTree AvlKeyless
+#define AVL_KEYLESS
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef AVL_KEYLESS
+
+#endif /* _AAPL_AVLKEYLESS_H */
diff --git a/aapl/avlmap.h b/aapl/avlmap.h
new file mode 100644
index 0000000..e4e1566
--- /dev/null
+++ b/aapl/avlmap.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLMAP_H
+#define _AAPL_AVLMAP_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlMap
+ * \brief Key and value oriented AVL tree.
+ *
+ * AvlMap stores key and value pairs in elements that managed by the tree. It
+ * is intendend to be similar to map template found in the STL. AvlMap
+ * requires that a Key type, a Value type, and a class containing a compare()
+ * routine for Key be given. Items can be inserted with just a key or with a
+ * key and value pair.
+ *
+ * AvlMap assumes all elements in the tree are allocated on the heap and are
+ * to be managed by the tree. This means that the class destructor will delete
+ * the contents of the tree. A deep copy will cause existing elements to be
+ * deleted first.
+ *
+ * \include ex_avlmap.cpp
+ */
+
+/*@}*/
+
+#define AVLTREE_MAP
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Key, class Value, class Compare
+#define AVLMEL_TEMPUSE Key, Value, Compare
+#define AvlTree AvlMap
+#define Element AvlMapEl<Key,Value>
+
+#include "avlcommon.h"
+
+#undef AVLTREE_MAP
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef Element
+
+
+
+#endif /* _AAPL_AVLMAP_H */
diff --git a/aapl/avlmel.h b/aapl/avlmel.h
new file mode 100644
index 0000000..7bfad3b
--- /dev/null
+++ b/aapl/avlmel.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLMEL_H
+#define _AAPL_AVLMEL_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlMel
+ * \brief AVL tree for elements appearing in multiple trees.
+ *
+ * AvlMel allows for an element to simultaneously be in multiple trees without
+ * the trees interferring with one another. For each tree that the element is
+ * to appear in, there must be a distinct set of AVL Tree management data that
+ * can be unambiguously referenced with some base class name. This name
+ * is passed to the tree as a template parameter and is used in the tree
+ * algorithms.
+ *
+ * The element must use the same key type and value in each tree that it
+ * appears in. If distinct keys are required, the AvlMelKey structure is
+ * available.
+ *
+ * AvlMel does not assume ownership of elements in the tree. The destructor
+ * will not delete the elements. If the user wishes to explicitly deallocate
+ * all the items in the tree the empty() routine is available.
+ *
+ * \include ex_avlmel.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) BaseEl::name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Element, class Key, \
+ class BaseEl, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, \
+ class BaseEl, class Compare
+#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare
+#define AvlTree AvlMel
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+
+#endif /* _AAPL_AVLMEL_H */
diff --git a/aapl/avlmelkey.h b/aapl/avlmelkey.h
new file mode 100644
index 0000000..9261cc8
--- /dev/null
+++ b/aapl/avlmelkey.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLMELKEY_H
+#define _AAPL_AVLMELKEY_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlMelKey
+ * \brief AVL tree for elements appearing in multiple trees with different keys.
+ *
+ * AvlMelKey is similar to AvlMel, except that an additional template
+ * parameter, BaseKey, is provided for resolving ambiguous references to
+ * getKey(). This means that if an element is stored in multiple trees, each
+ * tree can use a different key for ordering the elements in it. Using
+ * AvlMelKey an array of data structures can be indexed with an O(log(n))
+ * search on two or more of the values contained within it and without
+ * allocating any additional data.
+ *
+ * AvlMelKey does not assume ownership of elements in the tree. The destructor
+ * will not delete the elements. If the user wishes to explicitly deallocate
+ * all the items in the tree the empty() routine is available.
+ *
+ * \include ex_avlmelkey.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) BaseEl::name
+#define BASEKEY(name) BaseKey::name
+#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \
+ class BaseKey, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \
+ class BaseKey, class Compare
+#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare
+#define AvlTree AvlMelKey
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+
+#endif /* _AAPL_AVLMELKEY_H */
diff --git a/aapl/avlset.h b/aapl/avlset.h
new file mode 100644
index 0000000..224ee59
--- /dev/null
+++ b/aapl/avlset.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLSET_H
+#define _AAPL_AVLSET_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlSet
+ * \brief Key-only oriented tree.
+ *
+ * AvlSet stores only keys in elements that are managed by the tree. AvlSet
+ * requires that a Key type and a class containing a compare() routine
+ * for Key be given. Items are inserted with just a key value.
+ *
+ * AvlSet assumes all elements in the tree are allocated on the heap and are
+ * to be managed by the tree. This means that the class destructor will delete
+ * the contents of the tree. A deep copy will cause existing elements to be
+ * deleted first.
+ *
+ * \include ex_avlset.cpp
+ */
+
+/*@}*/
+
+#define AVLTREE_SET
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Key, class Compare
+#define AVLMEL_TEMPUSE Key, Compare
+#define AvlTree AvlSet
+#define Element AvlSetEl<Key>
+
+#include "avlcommon.h"
+
+#undef AVLTREE_SET
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+#undef Element
+
+#endif /* _AAPL_AVLSET_H */
diff --git a/aapl/avltree.h b/aapl/avltree.h
new file mode 100644
index 0000000..cf15359
--- /dev/null
+++ b/aapl/avltree.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_AVLTREE_H
+#define _AAPL_AVLTREE_H
+
+#include "compare.h"
+
+/**
+ * \addtogroup avltree
+ * @{
+ */
+
+/**
+ * \class AvlTree
+ * \brief Basic AVL tree.
+ *
+ * AvlTree is the standard by-structure AVL tree. To use this structure the
+ * user must define an element type and give it the necessary properties. At
+ * the very least it must have a getKey() function that will be used to
+ * compare the relative ordering of elements and tree management data
+ * necessary for the AVL algorithm. An element type can acquire the management
+ * data by inheriting the AvlTreeEl class.
+ *
+ * AvlTree does not presume to manage the allocation of elements in the tree.
+ * The destructor will not delete the items in the tree, instead the elements
+ * must be explicitly de-allocated by the user if necessary and when it is
+ * safe to do so. The empty() routine will traverse the tree and delete all
+ * items.
+ *
+ * Since the tree does not manage the elements, it can contain elements that
+ * are allocated statically or that are part of another data structure.
+ *
+ * \include ex_avltree.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define BASEKEY(name) name
+#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd<Key>
+#define AVLMEL_TEMPDEF class Element, class Key, class Compare
+#define AVLMEL_TEMPUSE Element, Key, Compare
+#define AvlTree AvlTree
+
+#include "avlcommon.h"
+
+#undef BASE_EL
+#undef BASEKEY
+#undef AVLMEL_CLASSDEF
+#undef AVLMEL_TEMPDEF
+#undef AVLMEL_TEMPUSE
+#undef AvlTree
+
+#endif /* _AAPL_AVLTREE_H */
diff --git a/aapl/bstcommon.h b/aapl/bstcommon.h
new file mode 100644
index 0000000..bd390cd
--- /dev/null
+++ b/aapl/bstcommon.h
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* This header is not wrapped in ifndefs because it is
+ * not intended to be included by users directly. */
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/* Binary Search Table */
+template < BST_TEMPL_DECLARE > class BstTable :
+ public Compare,
+ public Vector< Element, Resize >
+{
+ typedef Vector<Element, Resize> BaseVector;
+ typedef Table<Element> BaseTable;
+
+public:
+ /**
+ * \brief Default constructor.
+ *
+ * Create an empty binary search table.
+ */
+ BstTable() { }
+
+ /**
+ * \brief Construct with initial value.
+ *
+ * Constructs a binary search table with an initial item. Uses the default
+ * constructor for initializing Value.
+ */
+ BstTable(const Key &key)
+ { insert(key); }
+
+#if defined( BSTMAP )
+ /**
+ * \brief Construct with initial value.
+ *
+ * Constructs a binary search table with an initial key/value pair.
+ */
+ BstTable(const Key &key, const Value &val)
+ { insert(key, val); }
+#endif
+
+#if ! defined( BSTSET )
+ /**
+ * \brief Construct with initial value.
+ *
+ * Constructs a binary search table with an initial Element.
+ */
+ BstTable(const Element &el)
+ { insert(el); }
+#endif
+
+ Element *insert(const Key &key, Element **lastFound = 0);
+ Element *insertMulti(const Key &key);
+
+ bool insert(const BstTable &other);
+ void insertMulti(const BstTable &other);
+
+#if defined( BSTMAP )
+ Element *insert(const Key &key, const Value &val,
+ Element **lastFound = 0);
+ Element *insertMulti(const Key &key, const Value &val );
+#endif
+
+#if ! defined( BSTSET )
+ Element *insert(const Element &el, Element **lastFound = 0);
+ Element *insertMulti(const Element &el);
+#endif
+
+ Element *find(const Key &key, Element **lastFound = 0) const;
+ bool findMulti( const Key &key, Element *&lower,
+ Element *&upper ) const;
+
+ bool remove(const Key &key);
+ bool remove(Element *item);
+ long removeMulti(const Key &key);
+ long removeMulti(Element *lower, Element *upper);
+
+ /* The following provide access to the underlying insert and remove
+ * functions that my be hidden by the BST insert and remove. The insertDup
+ * and insertNew functions will never be hidden. They are provided for
+ * consistency. The difference between the non-shared and the shared
+ * tables is the documentation reference to the invoked function. */
+
+#if !defined( SHARED_BST )
+ /*@{*/
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes Vector::insert( long pos, const T &val ).
+ */
+ void vinsert(long pos, const Element &val)
+ { Vector< Element, Resize >::insert( pos, &val, 1 ); }
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes Vector::insert( long pos, const T *val, long len ).
+ */
+ void vinsert(long pos, const Element *val, long len)
+ { Vector< Element, Resize >::insert( pos, val, len ); }
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes Vector::insert( long pos, const Vector &v ).
+ */
+ void vinsert(long pos, const BstTable &v)
+ { Vector< Element, Resize >::insert( pos, v.data, v.tabLen ); }
+
+ /*@}*/
+
+ /*@{*/
+
+ /** \brief Call the remove of the underlying vector.
+ *
+ * Provides access to the vector remove, which may become hidden.
+ * Invokes Vector::remove( long pos ).
+ */
+ void vremove(long pos)
+ { Vector< Element, Resize >::remove( pos, 1 ); }
+
+ /** \brief Call the remove of the underlying vector.
+ *
+ * Proves access to the vector remove, which may become hidden.
+ * Invokes Vector::remove( long pos, long len ).
+ */
+ void vremove(long pos, long len)
+ { Vector< Element, Resize >::remove( pos, len ); }
+
+ /*@}*/
+#else /* SHARED_BST */
+ /*@{*/
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes SVector::insert( long pos, const T &val ).
+ */
+ void vinsert(long pos, const Element &val)
+ { Vector< Element, Resize >::insert( pos, &val, 1 ); }
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes SVector::insert( long pos, const T *val, long len ).
+ */
+ void vinsert(long pos, const Element *val, long len)
+ { Vector< Element, Resize >::insert( pos, val, len ); }
+
+ /** \brief Call the insert of the underlying vector.
+ *
+ * Provides to access to the vector insert, which may become hidden. Care
+ * should be taken to ensure that after the insert the ordering of
+ * elements is preserved.
+ * Invokes SVector::insert( long pos, const SVector &v ).
+ */
+ void vinsert(long pos, const BstTable &v)
+ { Vector< Element, Resize >::insert( pos, v.data, v.length() ); }
+
+ /*@}*/
+
+ /*@{*/
+
+ /** \brief Call the remove of the underlying vector.
+ *
+ * Provides access to the vector remove, which may become hidden.
+ * Invokes SVector::remove( long pos ).
+ */
+ void vremove(long pos)
+ { Vector< Element, Resize >::remove( pos, 1 ); }
+
+ /** \brief Call the remove of the underlying vector.
+ *
+ * Proves access to the vector remove, which may become hidden.
+ * Invokes SVector::remove( long pos, long len ).
+ */
+ void vremove(long pos, long len)
+ { Vector< Element, Resize >::remove( pos, len ); }
+
+ /*@}*/
+
+#endif /* SHARED_BST */
+};
+
+
+#if 0
+#if defined( SHARED_BST )
+/**
+ * \brief Construct a binary search table with an initial amount of
+ * allocation.
+ *
+ * The table is initialized to have room for allocLength elements. The
+ * table starts empty.
+ */
+template <BST_TEMPL_DEF> BstTable<BST_TEMPL_USE>::
+ BstTable( long allocLen )
+{
+ /* Allocate the space if we are given a positive allocLen. */
+ if ( allocLen > 0 ) {
+ /* Allocate the data needed. */
+ STabHead *head = (STabHead*)
+ malloc( sizeof(STabHead) + sizeof(Element) * allocLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header and save the data pointer. */
+ head->refCount = 1;
+ head->allocLen = allocLen;
+ head->tabLen = 0;
+ BaseTable::data = (Element*) (head + 1);
+ }
+}
+#else
+/**
+ * \brief Construct a binary search table with an initial amount of
+ * allocation.
+ *
+ * The table is initialized to have room for allocLength elements. The
+ * table starts empty.
+ */
+template <BST_TEMPL_DEF> BstTable<BST_TEMPL_USE>::
+ BstTable( long allocLen )
+{
+ /* Allocate the space if we are given a positive allocLen. */
+ BaseTable::allocLen = allocLen;
+ if ( BaseTable::allocLen > 0 ) {
+ BaseTable::data = (Element*) malloc(sizeof(Element) * BaseTable::allocLen);
+ if ( BaseTable::data == NULL )
+ throw std::bad_alloc();
+ }
+}
+
+#endif
+#endif
+
+/**
+ * \brief Find the element with the given key and remove it.
+ *
+ * If multiple elements with the given key exist, then it is unspecified which
+ * element will be removed.
+ *
+ * \returns True if an element is found and consequently removed, false
+ * otherwise.
+ */
+template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>::
+ remove(const Key &key)
+{
+ Element *el = find(key);
+ if ( el != 0 ) {
+ Vector< Element >::remove(el - BaseTable::data);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * \brief Remove the element pointed to by item.
+ *
+ * If item does not point to an element in the tree, then undefined behaviour
+ * results. If item is null, then remove has no effect.
+ *
+ * \returns True if item is not null, false otherwise.
+ */
+template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>::
+ remove( Element *item )
+{
+ if ( item != 0 ) {
+ Vector< Element >::remove(item - BaseTable::data);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * \brief Find and remove the entire range of elements with the given key.
+ *
+ * \returns The number of elements removed.
+ */
+template <BST_TEMPL_DEF> long BstTable<BST_TEMPL_USE>::
+ removeMulti(const Key &key)
+{
+ Element *low, *high;
+ if ( findMulti(key, low, high) ) {
+ /* Get the length of the range. */
+ long num = high - low + 1;
+ Vector< Element >::remove(low - BaseTable::data, num);
+ return num;
+ }
+
+ return 0;
+}
+
+template <BST_TEMPL_DEF> long BstTable<BST_TEMPL_USE>::
+ removeMulti(Element *lower, Element *upper)
+{
+ /* Get the length of the range. */
+ long num = upper - lower + 1;
+ Vector< Element >::remove(lower - BaseTable::data, num);
+ return num;
+}
+
+
+/**
+ * \brief Find a range of elements with the given key.
+ *
+ * If any elements with the given key exist then lower and upper are set to
+ * the low and high ends of the continous range of elements with the key.
+ * Lower and upper will point to the first and last elements with the key.
+ *
+ * \returns True if any elements are found, false otherwise.
+ */
+template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>::
+ findMulti(const Key &key, Element *&low, Element *&high ) const
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation;
+ const long tblLen = BaseTable::length();
+
+ if ( BaseTable::data == 0 )
+ return false;
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the fd in the array. */
+ return false;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(key, GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ Element *lowEnd = BaseTable::data - 1;
+ Element *highEnd = BaseTable::data + tblLen;
+
+ lower = mid - 1;
+ while ( lower != lowEnd &&
+ compare(key, GET_KEY(*lower)) == 0 )
+ lower--;
+
+ upper = mid + 1;
+ while ( upper != highEnd &&
+ compare(key, GET_KEY(*upper)) == 0 )
+ upper++;
+
+ low = (Element*)lower + 1;
+ high = (Element*)upper - 1;
+ return true;
+ }
+ }
+}
+
+/**
+ * \brief Find an element with the given key.
+ *
+ * If the find succeeds then lastFound is set to the element found. If the
+ * find fails then lastFound is set the location where the key would be
+ * inserted. If there is more than one element in the tree with the given key,
+ * then it is unspecified which element is returned as the match.
+ *
+ * \returns The element found on success, null on failure.
+ */
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ find( const Key &key, Element **lastFound ) const
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation;
+ const long tblLen = BaseTable::length();
+
+ if ( BaseTable::data == 0 )
+ return 0;
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the key. Last found gets the insert location. */
+ if ( lastFound != 0 )
+ *lastFound = (Element*)lower;
+ return 0;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(key, GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ /* Key is found. Last found gets the found record. */
+ if ( lastFound != 0 )
+ *lastFound = (Element*)mid;
+ return (Element*)mid;
+ }
+ }
+}
+
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insert(const Key &key, Element **lastFound)
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the key in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(key, GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ if ( lastFound != 0 )
+ *lastFound = (Element*)mid;
+ return 0;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(key);
+
+ /* Set lastFound */
+ if ( lastFound != 0 )
+ *lastFound = BaseTable::data + insertPos;
+ return BaseTable::data + insertPos;
+}
+
+
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insertMulti(const Key &key)
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the key in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(key, GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ lower = mid;
+ goto insert;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(key);
+
+ /* Return the element inserted. */
+ return BaseTable::data + insertPos;
+}
+
+/**
+ * \brief Insert each element from other.
+ *
+ * Always attempts to insert all elements even if the insert of some item from
+ * other fails.
+ *
+ * \returns True if all items inserted successfully, false if any insert
+ * failed.
+ */
+template <BST_TEMPL_DEF> bool BstTable<BST_TEMPL_USE>::
+ insert(const BstTable &other)
+{
+ bool allSuccess = true;
+ long otherLen = other.length();
+ for ( long i = 0; i < otherLen; i++ ) {
+ Element *el = insert( other.data[i] );
+ if ( el == 0 )
+ allSuccess = false;
+ }
+ return allSuccess;
+}
+
+/**
+ * \brief Insert each element from other even if the elements exist already.
+ *
+ * No individual insertMulti can fail.
+ */
+template <BST_TEMPL_DEF> void BstTable<BST_TEMPL_USE>::
+ insertMulti(const BstTable &other)
+{
+ long otherLen = other.length();
+ for ( long i = 0; i < otherLen; i++ )
+ insertMulti( other.data[i] );
+}
+
+#if ! defined( BSTSET )
+
+/**
+ * \brief Insert the given element.
+ *
+ * If the key in the given element does not already exist in the table then a
+ * new element is inserted. They element copy constructor is used to place the
+ * element into the table. If lastFound is given, it is set to the new element
+ * created. If the insert fails then lastFound is set to the existing element
+ * of the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insert(const Element &el, Element **lastFound )
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the key in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(GET_KEY(el), GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ if ( lastFound != 0 )
+ *lastFound = (Element*)mid;
+ return 0;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(el);
+
+ /* Set lastFound */
+ if ( lastFound != 0 )
+ *lastFound = BaseTable::data + insertPos;
+ return BaseTable::data + insertPos;
+}
+
+/**
+ * \brief Insert the given element even if it exists already.
+ *
+ * If the key in the given element exists already then the new element is
+ * placed next to some other element of the same key. InsertMulti cannot fail.
+ * The element copy constructor is used to place the element in the table.
+ *
+ * \returns The new element created.
+ */
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insertMulti(const Element &el)
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the fd in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = compare(GET_KEY(el), GET_KEY(*mid));
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ lower = mid;
+ goto insert;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(el);
+
+ /* Return the element inserted. */
+ return BaseTable::data + insertPos;
+}
+#endif
+
+
+#if defined( BSTMAP )
+
+/**
+ * \brief Insert the given key-value pair.
+ *
+ * If the given key does not already exist in the table then the key-value
+ * pair is inserted. Copy constructors are used to place the pair in the
+ * table. If lastFound is given, it is set to the new entry created. If the
+ * insert fails then lastFound is set to the existing pair of the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insert(const Key &key, const Value &val, Element **lastFound)
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the fd in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = Compare::compare(key, mid->key);
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ if ( lastFound != NULL )
+ *lastFound = (Element*)mid;
+ return 0;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(key, val);
+
+ /* Set lastFound */
+ if ( lastFound != NULL )
+ *lastFound = BaseTable::data + insertPos;
+ return BaseTable::data + insertPos;
+}
+
+
+/**
+ * \brief Insert the given key-value pair even if the key exists already.
+ *
+ * If the key exists already then the key-value pair is placed next to some
+ * other pair of the same key. InsertMulti cannot fail. Copy constructors are
+ * used to place the pair in the table.
+ *
+ * \returns The new element created.
+ */
+template <BST_TEMPL_DEF> Element *BstTable<BST_TEMPL_USE>::
+ insertMulti(const Key &key, const Value &val)
+{
+ const Element *lower, *mid, *upper;
+ long keyRelation, insertPos;
+ const long tblLen = BaseTable::length();
+
+ if ( tblLen == 0 ) {
+ /* If the table is empty then go straight to insert. */
+ lower = BaseTable::data;
+ goto insert;
+ }
+
+ lower = BaseTable::data;
+ upper = BaseTable::data + tblLen - 1;
+ while ( true ) {
+ if ( upper < lower ) {
+ /* Did not find the key in the array.
+ * Place to insert at is lower. */
+ goto insert;
+ }
+
+ mid = lower + ((upper-lower)>>1);
+ keyRelation = Compare::compare(key, mid->key);
+
+ if ( keyRelation < 0 )
+ upper = mid - 1;
+ else if ( keyRelation > 0 )
+ lower = mid + 1;
+ else {
+ lower = mid;
+ goto insert;
+ }
+ }
+
+insert:
+ /* Get the insert pos. */
+ insertPos = lower - BaseTable::data;
+
+ /* Do the insert. */
+ BaseVector::makeRawSpaceFor(insertPos, 1);
+ new(BaseTable::data + insertPos) Element(key, val);
+
+ /* Return the element inserted. */
+ return BaseTable::data + insertPos;
+}
+
+#endif
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
diff --git a/aapl/bstmap.h b/aapl/bstmap.h
new file mode 100644
index 0000000..5154b86
--- /dev/null
+++ b/aapl/bstmap.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_BSTMAP_H
+#define _AAPL_BSTMAP_H
+
+#include "compare.h"
+#include "vector.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \brief Element for BstMap.
+ *
+ * Stores the key and value pair.
+ */
+template <class Key, class Value> struct BstMapEl
+{
+ BstMapEl() {}
+ BstMapEl(const Key &key) : key(key) {}
+ BstMapEl(const Key &key, const Value &val) : key(key), value(val) {}
+
+ /** \brief The key */
+ Key key;
+
+ /** \brief The value. */
+ Value value;
+};
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class BstMap
+ * \brief Binary search table for key and value pairs.
+ *
+ * BstMap stores key and value pairs in each element. The key and value can be
+ * any type. A compare class for the key must be supplied.
+ */
+
+/*@}*/
+
+#define BST_TEMPL_DECLARE class Key, class Value, \
+ class Compare = CmpOrd<Key>, class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize
+#define BST_TEMPL_USE Key, Value, Compare, Resize
+#define GET_KEY(el) ((el).key)
+#define BstTable BstMap
+#define Element BstMapEl<Key, Value>
+#define BSTMAP
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BstTable
+#undef Element
+#undef BSTMAP
+
+/**
+ * \fn BstMap::insert(const Key &key, BstMapEl<Key, Value> **lastFound)
+ * \brief Insert the given key.
+ *
+ * If the given key does not already exist in the table then a new element
+ * having key is inserted. They key copy constructor and value default
+ * constructor are used to place the pair in the table. If lastFound is given,
+ * it is set to the new entry created. If the insert fails then lastFound is
+ * set to the existing pair of the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn BstMap::insertMulti(const Key &key)
+ * \brief Insert the given key even if it exists already.
+ *
+ * If the key exists already then the new element having key is placed next
+ * to some other pair of the same key. InsertMulti cannot fail. The key copy
+ * constructor and the value default constructor are used to place the pair in
+ * the table.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_BSTMAP_H */
diff --git a/aapl/bstset.h b/aapl/bstset.h
new file mode 100644
index 0000000..ce710ee
--- /dev/null
+++ b/aapl/bstset.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_BSTSET_H
+#define _AAPL_BSTSET_H
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class BstSet
+ * \brief Binary search table for types that are the key.
+ *
+ * BstSet is suitable for types that comprise the entire key. Rather than look
+ * into the element to retrieve the key, the element is the key. A class that
+ * contains a comparison routine for the key must be given.
+ */
+
+/*@}*/
+
+#include "compare.h"
+#include "vector.h"
+
+#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd<Key>, \
+ class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Key, class Compare, class Resize
+#define BST_TEMPL_USE Key, Compare, Resize
+#define GET_KEY(el) (el)
+#define BstTable BstSet
+#define Element Key
+#define BSTSET
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BstTable
+#undef Element
+#undef BSTSET
+
+/**
+ * \fn BstSet::insert(const Key &key, Key **lastFound)
+ * \brief Insert the given key.
+ *
+ * If the given key does not already exist in the table then it is inserted.
+ * The key's copy constructor is used to place the item in the table. If
+ * lastFound is given, it is set to the new entry created. If the insert fails
+ * then lastFound is set to the existing key of the same value.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn BstSet::insertMulti(const Key &key)
+ * \brief Insert the given key even if it exists already.
+ *
+ * If the key exists already then it is placed next to some other key of the
+ * same value. InsertMulti cannot fail. The key's copy constructor is used to
+ * place the item in the table.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_BSTSET_H */
diff --git a/aapl/bsttable.h b/aapl/bsttable.h
new file mode 100644
index 0000000..9898ebf
--- /dev/null
+++ b/aapl/bsttable.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_BSTTABLE_H
+#define _AAPL_BSTTABLE_H
+
+#include "compare.h"
+#include "vector.h"
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class BstTable
+ * \brief Binary search table for structures that contain a key.
+ *
+ * This is the basic binary search table. It can be used to contain a
+ * structure that has a key and possibly some data. The key should be a member
+ * of the element class and accessible with getKey(). A class containing the
+ * compare routine must be supplied.
+ */
+
+/*@}*/
+
+#define BST_TEMPL_DECLARE class Element, class Key, \
+ class Compare = CmpOrd<Key>, class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize
+#define BST_TEMPL_USE Element, Key, Compare, Resize
+#define GET_KEY(el) ((el).getKey())
+#define BSTTABLE
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BSTTABLE
+
+/**
+ * \fn BstTable::insert(const Key &key, Element **lastFound)
+ * \brief Insert a new element with the given key.
+ *
+ * If the given key does not already exist in the table a new element is
+ * inserted with the given key. A constructor taking only const Key& is used
+ * to initialize the new element. If lastFound is given, it is set to the new
+ * element created. If the insert fails then lastFound is set to the existing
+ * element with the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn BstTable::insertMulti(const Key &key)
+ * \brief Insert a new element even if the key exists already.
+ *
+ * If the key exists already then the new element is placed next to some
+ * element with the same key. InsertMulti cannot fail. A constructor taking
+ * only const Key& is used to initialize the new element.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_BSTTABLE_H */
diff --git a/aapl/bubblesort.h b/aapl/bubblesort.h
new file mode 100644
index 0000000..20e0f6f
--- /dev/null
+++ b/aapl/bubblesort.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_BUBBLESORT_H
+#define _AAPL_BUBBLESORT_H
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup sort
+ * @{
+ */
+
+/**
+ * \class BubbleSort
+ * \brief Bubble sort an array of data.
+ *
+ * BubbleSort can be used to sort any array of objects of type T provided a
+ * compare class is given. BubbleSort is in-place. It does not require any
+ * temporary storage.
+ *
+ * Objects are not made aware that they are being moved around in memory.
+ * Assignment operators, constructors and destructors are never invoked by the
+ * sort.
+ *
+ * BubbleSort runs in O(n^2) time. It is most useful when sorting arrays that
+ * are nearly sorted. It is best when neighbouring pairs are out of place.
+ * BubbleSort is a stable sort, meaning that objects with the same key have
+ * their relative ordering preserved.
+ */
+
+/*@}*/
+
+/* BubbleSort. */
+template <class T, class Compare> class BubbleSort
+ : public Compare
+{
+public:
+ /* Sorting interface routine. */
+ void sort(T *data, long len);
+};
+
+
+/**
+ * \brief Bubble sort an array of data.
+ */
+template <class T, class Compare> void BubbleSort<T,Compare>::
+ sort(T *data, long len)
+{
+ bool changed = true;
+ for ( long pass = 1; changed && pass < len; pass ++ ) {
+ changed = false;
+ for ( long i = 0; i < len-pass; i++ ) {
+ /* Do we swap pos with the next one? */
+ if ( compare( data[i], data[i+1] ) > 0 ) {
+ char tmp[sizeof(T)];
+
+ /* Swap the two items. */
+ memcpy( tmp, data+i, sizeof(T) );
+ memcpy( data+i, data+i+1, sizeof(T) );
+ memcpy( data+i+1, tmp, sizeof(T) );
+
+ /* Note that we made a change. */
+ changed = true;
+ }
+ }
+ }
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_BUBBLESORT_H */
diff --git a/aapl/compare.h b/aapl/compare.h
new file mode 100644
index 0000000..e537736
--- /dev/null
+++ b/aapl/compare.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_COMPARE_H
+#define _AAPL_COMPARE_H
+
+#include <string.h>
+#include "table.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \defgroup compare Compare
+ * \brief Basic compare clases.
+ *
+ * Compare classes are used by data structures that need to know the relative
+ * ordering of elemets. To become a compare class, a class must imlement a
+ * routine long compare(const T &key1, const T &key2) that behaves just like
+ * strcmp.
+ *
+ * Compare classes are passed to the template data structure as a template
+ * parameter and are inherited. In most cases the compare routine will base
+ * the key comparision only on the two keys and the compare routine can
+ * therefore be static. Though sometimes it is useful to include data in the
+ * compare class and use this data in the comparison. For example the compare
+ * class may contain a pointer to some other data structure to which the
+ * comparison is delegated.
+ *
+ * @{
+ */
+
+/**
+ * \brief Compare two null terminated character sequences.
+ *
+ * This comparision class is a wrapper for strcmp.
+ */
+struct CmpStr
+{
+ /**
+ * \brief Compare two null terminated string types.
+ */
+ static inline long compare(const char *k1, const char *k2)
+ { return strcmp(k1, k2); }
+};
+
+/**
+ * \brief Compare a type for which < and > are implemented.
+ *
+ * CmpOrd is suitable for simple types such as integers and pointers that by
+ * default have the less-than and greater-than operators defined.
+ */
+template <class T> struct CmpOrd
+{
+ /**
+ * \brief Compare two ordinal types.
+ *
+ * This compare routine copies its arguements in by value.
+ */
+ static inline long compare(const T k1, const T k2)
+ {
+ if (k1 < k2)
+ return -1;
+ else if (k1 > k2)
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/**
+ * \brief Compare two tables of type T
+ *
+ * Table comparison is useful for keying a data structure on a vector or
+ * binary search table. T is the element type stored in the table.
+ * CompareT is the comparison structure used to compare the individual values
+ * in the table.
+ */
+template < class T, class CompareT = CmpOrd<T> > struct CmpTable
+ : public CompareT
+{
+ /**
+ * \brief Compare two tables storing type T.
+ */
+ static inline long compare(const Table<T> &t1, const Table<T> &t2)
+ {
+ if ( t1.tabLen < t2.tabLen )
+ return -1;
+ else if ( t1.tabLen > t2.tabLen )
+ return 1;
+ else
+ {
+ T *i1 = t1.data, *i2 = t2.data;
+ long len = t1.tabLen, cmpResult;
+ for ( long pos = 0; pos < len;
+ pos += 1, i1 += 1, i2 += 1 )
+ {
+ cmpResult = CompareT::compare(*i1, *i2);
+ if ( cmpResult != 0 )
+ return cmpResult;
+ }
+ return 0;
+ }
+ }
+};
+
+/**
+ * \brief Compare two tables of type T -- non-static version.
+ *
+ * CmpTableNs is identical to CmpTable, however the compare routine is
+ * non-static. If the CompareT class contains a non-static compare, then this
+ * version must be used because a static member cannot invoke a non-static
+ * member.
+ *
+ * Table comparison is useful for keying a data structure on a vector or binary
+ * search table. T is the element type stored in the table. CompareT
+ * is the comparison structure used to compare the individual values in the
+ * table.
+ */
+template < class T, class CompareT = CmpOrd<T> > struct CmpTableNs
+ : public CompareT
+{
+ /**
+ * \brief Compare two tables storing type T.
+ */
+ inline long compare(const Table<T> &t1, const Table<T> &t2)
+ {
+ if ( t1.tabLen < t2.tabLen )
+ return -1;
+ else if ( t1.tabLen > t2.tabLen )
+ return 1;
+ else
+ {
+ T *i1 = t1.data, *i2 = t2.data;
+ long len = t1.tabLen, cmpResult;
+ for ( long pos = 0; pos < len;
+ pos += 1, i1 += 1, i2 += 1 )
+ {
+ cmpResult = CompareT::compare(*i1, *i2);
+ if ( cmpResult != 0 )
+ return cmpResult;
+ }
+ return 0;
+ }
+ }
+};
+
+/**
+ * \brief Compare two implicitly shared tables of type T
+ *
+ * This table comparison is for data structures based on implicitly
+ * shared tables.
+ *
+ * Table comparison is useful for keying a data structure on a vector or
+ * binary search table. T is the element type stored in the table.
+ * CompareT is the comparison structure used to compare the individual values
+ * in the table.
+ */
+template < class T, class CompareT = CmpOrd<T> > struct CmpSTable : public CompareT
+{
+ /**
+ * \brief Compare two tables storing type T.
+ */
+ static inline long compare(const STable<T> &t1, const STable<T> &t2)
+ {
+ long t1Length = t1.length();
+ long t2Length = t2.length();
+
+ /* Compare lengths. */
+ if ( t1Length < t2Length )
+ return -1;
+ else if ( t1Length > t2Length )
+ return 1;
+ else {
+ /* Compare the table data. */
+ T *i1 = t1.data, *i2 = t2.data;
+ for ( long pos = 0; pos < t1Length;
+ pos += 1, i1 += 1, i2 += 1 )
+ {
+ long cmpResult = CompareT::compare(*i1, *i2);
+ if ( cmpResult != 0 )
+ return cmpResult;
+ }
+ return 0;
+ }
+ }
+};
+
+/**
+ * \brief Compare two implicitly shared tables of type T -- non-static
+ * version.
+ *
+ * This is a non-static table comparison for data structures based on
+ * implicitly shared tables. If the CompareT class contains a non-static
+ * compare, then this version must be used because a static member cannot
+ * invoke a non-static member.
+ *
+ * Table comparison is useful for keying a data structure on a vector or
+ * binary search table. T is the element type stored in the table.
+ * CompareT is the comparison structure used to compare the individual values
+ * in the table.
+ */
+template < class T, class CompareT = CmpOrd<T> > struct CmpSTableNs
+ : public CompareT
+{
+ /**
+ * \brief Compare two tables storing type T.
+ */
+ inline long compare(const STable<T> &t1, const STable<T> &t2)
+ {
+ long t1Length = t1.length();
+ long t2Length = t2.length();
+
+ /* Compare lengths. */
+ if ( t1Length < t2Length )
+ return -1;
+ else if ( t1Length > t2Length )
+ return 1;
+ else {
+ /* Compare the table data. */
+ T *i1 = t1.data, *i2 = t2.data;
+ for ( long pos = 0; pos < t1Length;
+ pos += 1, i1 += 1, i2 += 1 )
+ {
+ long cmpResult = CompareT::compare(*i1, *i2);
+ if ( cmpResult != 0 )
+ return cmpResult;
+ }
+ return 0;
+ }
+ }
+};
+
+
+/*@}*/
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_COMPARE_H */
diff --git a/aapl/dlcommon.h b/aapl/dlcommon.h
new file mode 100644
index 0000000..5ce9bd3
--- /dev/null
+++ b/aapl/dlcommon.h
@@ -0,0 +1,790 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* This header is not wrapped in ifndef becuase it is not intended to
+ * be included by the user. */
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+#if defined( DOUBLELIST_VALUE )
+/**
+ * \brief Double list element for DListVal.
+ *
+ * DListValEl stores the type T of DListVal by value.
+ */
+template <class T> struct DListValEl
+{
+ /**
+ * \brief Construct a DListValEl with a given value.
+ *
+ * The only constructor available initializes the value element. This
+ * enforces that DListVal elements are never created without having their
+ * value intialzed by the user. T's copy constructor is used to copy the
+ * value in.
+ */
+ DListValEl( const T &val ) : value(val) { }
+
+ /**
+ * \brief Value stored by the list element.
+ *
+ * Value is always copied into new list elements using the copy
+ * constructor.
+ */
+ T value;
+
+ /**
+ * \brief List previous pointer.
+ *
+ * Points to the previous item in the list. If this is the first item in
+ * the list, then prev is NULL. If this element is not in a list then
+ * prev is undefined.
+ */
+ DListValEl<T> *prev;
+
+ /**
+ * \brief List next pointer.
+ *
+ * Points to the next item in the list. If this is the list item in the
+ * list, then next is NULL. If this element is not in a list then next is
+ * undefined.
+ */
+ DListValEl<T> *next;
+};
+#else
+
+#ifndef __AAPL_DOUBLE_LIST_EL
+#define __AAPL_DOUBLE_LIST_EL
+/**
+ * \brief Double list element properties.
+ *
+ * This class can be inherited to make a class suitable to be a double list
+ * element. It simply provides the next and previous pointers. An alternative
+ * is to put the next and previous pointers in the class directly.
+ */
+template <class Element> struct DListEl
+{
+ /**
+ * \brief List previous pointer.
+ *
+ * Points to the previous item in the list. If this is the first item in
+ * the list, then prev is NULL. If this element is not in a list then
+ * prev is undefined.
+ */
+ Element *prev;
+
+ /**
+ * \brief List next pointer.
+ *
+ * Points to the next item in the list. If this is the list item in the
+ * list, then next is NULL. If this element is not in a list then next is
+ * undefined.
+ */
+ Element *next;
+};
+#endif /* __AAPL_DOUBLE_LIST_EL */
+
+#endif
+
+/* Doubly Linked List */
+template <DLMEL_TEMPDEF> class DList
+{
+public:
+ /** \brief Initialize an empty list. */
+ DList() : head(0), tail(0), listLen(0) {}
+
+ /**
+ * \brief Perform a deep copy of the list.
+ *
+ * The elements of the other list are duplicated and put into this list.
+ * Elements are copied using the copy constructor.
+ */
+ DList(const DList &other);
+
+#ifdef DOUBLELIST_VALUE
+ /**
+ * \brief Clear the double list contents.
+ *
+ * All elements are deleted.
+ */
+ ~DList() { empty(); }
+
+ /**
+ * \brief Assign another list into this list using a deep copy.
+ *
+ * The elements of the other list are duplicated and put into this list.
+ * Each list item is created using the copy constructor. If this list
+ * contains any elements before the copy, they are deleted first.
+ *
+ * \returns A reference to this.
+ */
+ DList &operator=(const DList &other);
+
+ /**
+ * \brief Transfer the contents of another list into this list.
+ *
+ * The elements of the other list moved in. The other list will be empty
+ * afterwards. If this list contains any elements before the copy, then
+ * they are deleted.
+ */
+ void transfer(DList &other);
+#else
+ /**
+ * \brief Abandon all elements in the list.
+ *
+ * List elements are not deleted.
+ */
+ ~DList() {}
+
+ /**
+ * \brief Perform a deep copy of the list.
+ *
+ * The elements of the other list are duplicated and put into this list.
+ * Each list item is created using the copy constructor. If this list
+ * contains any elements before the copy, they are abandoned.
+ *
+ * \returns A reference to this.
+ */
+ DList &operator=(const DList &other);
+
+ /**
+ * \brief Transfer the contents of another list into this list.
+ *
+ * The elements of the other list moved in. The other list will be empty
+ * afterwards. If this list contains any elements before the copy, they
+ * are abandoned.
+ */
+ void transfer(DList &other);
+#endif
+
+
+#ifdef DOUBLELIST_VALUE
+ /**
+ * \brief Make a new element and prepend it to the front of the list.
+ *
+ * The item is copied into the new element using the copy constructor.
+ * Equivalent to list.addBefore(list.head, item).
+ */
+ void prepend(const T &item);
+
+ /**
+ * \brief Make a new element and append it to the end of the list.
+ *
+ * The item is copied into the new element using the copy constructor.
+ * Equivalent to list.addAfter(list.tail, item).
+ */
+ void append(const T &item);
+
+ /**
+ * \brief Make a new element and insert it immediately after an element in
+ * the list.
+ *
+ * The item is copied into the new element using the copy constructor. If
+ * prev_el is NULL then the new element is prepended to the front of the
+ * list. If prev_el is not already in the list then undefined behaviour
+ * results. Equivalent to list.addAfter(prev_el, new DListValEl(item)).
+ */
+ void addAfter(Element *prev_el, const T &item);
+
+ /**
+ * \brief Make a new element and insert it immediately before an element
+ * in the list.
+ *
+ * The item is copied into the new element using the copy construcotor. If
+ * next_el is NULL then the new element is appended to the end of the
+ * list. If next_el is not already in the list then undefined behaviour
+ * results. Equivalent to list.addBefore(next_el, new DListValEl(item)).
+ */
+ void addBefore(Element *next_el, const T &item);
+#endif
+
+ /**
+ * \brief Prepend a single element to the front of the list.
+ *
+ * If new_el is already an element of some list, then undefined behaviour
+ * results. Equivalent to list.addBefore(list.head, new_el).
+ */
+ void prepend(Element *new_el) { addBefore(head, new_el); }
+
+ /**
+ * \brief Append a single element to the end of the list.
+ *
+ * If new_el is alreay an element of some list, then undefined behaviour
+ * results. Equivalent to list.addAfter(list.tail, new_el).
+ */
+ void append(Element *new_el) { addAfter(tail, new_el); }
+
+ /**
+ * \brief Prepend an entire list to the beginning of this list.
+ *
+ * All items are moved, not copied. Afterwards, the other list is emtpy.
+ * All items are prepended at once, so this is an O(1) operation.
+ * Equivalent to list.addBefore(list.head, dl).
+ */
+ void prepend(DList &dl) { addBefore(head, dl); }
+
+ /**
+ * \brief Append an entire list to the end of the list.
+ *
+ * All items are moved, not copied. Afterwards, the other list is empty.
+ * All items are appened at once, so this is an O(1) operation.
+ * Equivalent to list.addAfter(list.tail, dl).
+ */
+ void append(DList &dl) { addAfter(tail, dl); }
+
+ void addAfter(Element *prev_el, Element *new_el);
+ void addBefore(Element *next_el, Element *new_el);
+
+ void addAfter(Element *prev_el, DList &dl);
+ void addBefore(Element *next_el, DList &dl);
+
+ /**
+ * \brief Detach the head of the list
+ *
+ * The element detached is not deleted. If there is no head of the list
+ * (the list is empty) then undefined behaviour results. Equivalent to
+ * list.detach(list.head).
+ *
+ * \returns The element detached.
+ */
+ Element *detachFirst() { return detach(head); }
+
+ /**
+ * \brief Detach the tail of the list
+ *
+ * The element detached is not deleted. If there is no tail of the list
+ * (the list is empty) then undefined behaviour results. Equivalent to
+ * list.detach(list.tail).
+ *
+ * \returns The element detached.
+ */
+ Element *detachLast() { return detach(tail); }
+
+ /* Detaches an element from the list. Does not free any memory. */
+ Element *detach(Element *el);
+
+ /**
+ * \brief Detach and delete the first element in the list.
+ *
+ * If there is no first element (the list is empty) then undefined
+ * behaviour results. Equivalent to delete list.detach(list.head);
+ */
+ void removeFirst() { delete detach( head ); }
+
+ /**
+ * \brief Detach and delete the last element in the list.
+ *
+ * If there is no last element (the list is emtpy) then undefined
+ * behaviour results. Equivalent to delete list.detach(list.tail);
+ */
+ void removeLast() { delete detach( tail ); }
+
+ /**
+ * \brief Detach and delete an element from the list.
+ *
+ * If the element is not in the list, then undefined behaviour results.
+ * Equivalent to delete list.detach(el);
+ */
+ void remove(Element *el) { delete detach( el ); }
+
+ void empty();
+ void abandon();
+
+ /** \brief The number of elements in the list. */
+ long length() const { return listLen; }
+
+ /** \brief Head and tail of the linked list. */
+ Element *head, *tail;
+
+ /** \brief The number of element in the list. */
+ long listLen;
+
+ /* Convenience access. */
+ long size() const { return listLen; }
+
+ /* Forward this so a ref can be used. */
+ struct Iter;
+
+ /* Class for setting the iterator. */
+ struct IterFirst { IterFirst( const DList &l ) : l(l) { } const DList &l; };
+ struct IterLast { IterLast( const DList &l ) : l(l) { } const DList &l; };
+ struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; };
+ struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; };
+
+ /**
+ * \brief Double List Iterator.
+ * \ingroup iterators
+ */
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct from a double list. */
+ Iter( const DList &dl ) : ptr(dl.head) { }
+ Iter( Element *el ) : ptr(el) { }
+ Iter( const IterFirst &dlf ) : ptr(dlf.l.head) { }
+ Iter( const IterLast &dll ) : ptr(dll.l.tail) { }
+ Iter( const IterNext &dln ) : ptr(dln.i.ptr->BASE_EL(next)) { }
+ Iter( const IterPrev &dlp ) : ptr(dlp.i.ptr->BASE_EL(prev)) { }
+
+ /* Assign from a double list. */
+ Iter &operator=( const DList &dl ) { ptr = dl.head; return *this; }
+ Iter &operator=( Element *el ) { ptr = el; return *this; }
+ Iter &operator=( const IterFirst &af ) { ptr = af.l.head; return *this; }
+ Iter &operator=( const IterLast &al ) { ptr = al.l.tail; return *this; }
+ Iter &operator=( const IterNext &an ) { ptr = an.i.ptr->BASE_EL(next); return *this; }
+ Iter &operator=( const IterPrev &ap ) { ptr = ap.i.ptr->BASE_EL(prev); return *this; }
+
+ /** \brief Less than end? */
+ bool lte() const { return ptr != 0; }
+
+ /** \brief At end? */
+ bool end() const { return ptr == 0; }
+
+ /** \brief Greater than beginning? */
+ bool gtb() const { return ptr != 0; }
+
+ /** \brief At beginning? */
+ bool beg() const { return ptr == 0; }
+
+ /** \brief At first element? */
+ bool first() const { return ptr && ptr->BASE_EL(prev) == 0; }
+
+ /** \brief At last element? */
+ bool last() const { return ptr && ptr->BASE_EL(next) == 0; }
+
+ /** \brief Implicit cast to Element*. */
+ operator Element*() const { return ptr; }
+
+ /** \brief Dereference operator returns Element&. */
+ Element &operator *() const { return *ptr; }
+
+ /** \brief Arrow operator returns Element*. */
+ Element *operator->() const { return ptr; }
+
+ /** \brief Move to next item. */
+ inline Element *operator++() { return ptr = ptr->BASE_EL(next); }
+
+ /** \brief Move to next item. */
+ inline Element *increment() { return ptr = ptr->BASE_EL(next); }
+
+ /** \brief Move to next item. */
+ inline Element *operator++(int);
+
+ /** \brief Move to previous item. */
+ inline Element *operator--() { return ptr = ptr->BASE_EL(prev); }
+
+ /** \brief Move to previous item. */
+ inline Element *decrement() { return ptr = ptr->BASE_EL(prev); }
+
+ /** \brief Move to previous item. */
+ inline Element *operator--(int);
+
+ /** \brief Return the next item. Does not modify this. */
+ inline IterNext next() const { return IterNext(*this); }
+
+ /** \brief Return the prev item. Does not modify this. */
+ inline IterPrev prev() const { return IterPrev(*this); }
+
+ /** \brief The iterator is simply a pointer. */
+ Element *ptr;
+ };
+
+ /** \brief Return first element. */
+ IterFirst first() { return IterFirst(*this); }
+
+ /** \brief Return last element. */
+ IterLast last() { return IterLast(*this); }
+};
+
+/* Copy constructor, does a deep copy of other. */
+template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE>::
+ DList(const DList<DLMEL_TEMPUSE> &other) :
+ head(0), tail(0), listLen(0)
+{
+ Element *el = other.head;
+ while( el != 0 ) {
+ append( new Element(*el) );
+ el = el->BASE_EL(next);
+ }
+}
+
+#ifdef DOUBLELIST_VALUE
+
+/* Assignement operator does deep copy. */
+template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE> &DList<DLMEL_TEMPUSE>::
+ operator=(const DList &other)
+{
+ /* Free the old list. The value list assumes items were allocated on the
+ * heap by itself. */
+ empty();
+
+ Element *el = other.head;
+ while( el != 0 ) {
+ append( new Element(*el) );
+ el = el->BASE_EL(next);
+ }
+ return *this;
+}
+
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ transfer(DList &other)
+{
+ /* Free the old list. The value list assumes items were allocated on the
+ * heap by itself. */
+ empty();
+
+ head = other.head;
+ tail = other.tail;
+ listLen = other.listLen;
+
+ other.abandon();
+}
+
+#else
+
+/* Assignement operator does deep copy. */
+template <DLMEL_TEMPDEF> DList<DLMEL_TEMPUSE> &DList<DLMEL_TEMPUSE>::
+ operator=(const DList &other)
+{
+ Element *el = other.head;
+ while( el != 0 ) {
+ append( new Element(*el) );
+ el = el->BASE_EL(next);
+ }
+ return *this;
+}
+
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ transfer(DList &other)
+{
+ head = other.head;
+ tail = other.tail;
+ listLen = other.listLen;
+
+ other.abandon();
+}
+
+#endif
+
+#ifdef DOUBLELIST_VALUE
+
+/* Prepend a new item. Inlining this bloats the caller with new overhead. */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ prepend(const T &item)
+{
+ addBefore(head, new Element(item));
+}
+
+/* Append a new item. Inlining this bloats the caller with the new overhead. */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ append(const T &item)
+{
+ addAfter(tail, new Element(item));
+}
+
+/* Add a new item after a prev element. Inlining this bloats the caller with
+ * the new overhead. */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addAfter(Element *prev_el, const T &item)
+{
+ addAfter(prev_el, new Element(item));
+}
+
+/* Add a new item before a next element. Inlining this bloats the caller with
+ * the new overhead. */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addBefore(Element *next_el, const T &item)
+{
+ addBefore(next_el, new Element(item));
+}
+
+#endif
+
+/*
+ * The larger iterator operators.
+ */
+
+/* Postfix ++ */
+template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>::Iter::
+ operator++(int)
+{
+ Element *rtn = ptr;
+ ptr = ptr->BASE_EL(next);
+ return rtn;
+}
+
+/* Postfix -- */
+template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>::Iter::
+ operator--(int)
+{
+ Element *rtn = ptr;
+ ptr = ptr->BASE_EL(prev);
+ return rtn;
+}
+
+/**
+ * \brief Insert an element immediately after an element in the list.
+ *
+ * If prev_el is NULL then new_el is prepended to the front of the list. If
+ * prev_el is not in the list or if new_el is already in a list, then
+ * undefined behaviour results.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addAfter(Element *prev_el, Element *new_el)
+{
+ /* Set the previous pointer of new_el to prev_el. We do
+ * this regardless of the state of the list. */
+ new_el->BASE_EL(prev) = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ new_el->BASE_EL(next) = head;
+ head = new_el;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ new_el->BASE_EL(next) = prev_el->BASE_EL(next);
+ prev_el->BASE_EL(next) = new_el;
+ }
+
+ /* Set reverse pointers. */
+ if (new_el->BASE_EL(next) == 0) {
+ /* There is no next element. Set the tail pointer. */
+ tail = new_el;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ new_el->BASE_EL(next)->BASE_EL(prev) = new_el;
+ }
+
+ /* Update list length. */
+ listLen++;
+}
+
+/**
+ * \brief Insert an element immediatly before an element in the list.
+ *
+ * If next_el is NULL then new_el is appended to the end of the list. If
+ * next_el is not in the list or if new_el is already in a list, then
+ * undefined behaviour results.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addBefore(Element *next_el, Element *new_el)
+{
+ /* Set the next pointer of the new element to next_el. We do
+ * this regardless of the state of the list. */
+ new_el->BASE_EL(next) = next_el;
+
+ /* Set reverse pointers. */
+ if (next_el == 0) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ new_el->BASE_EL(prev) = tail;
+ tail = new_el;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ new_el->BASE_EL(prev) = next_el->BASE_EL(prev);
+ next_el->BASE_EL(prev) = new_el;
+ }
+
+ /* Set forward pointers. */
+ if (new_el->BASE_EL(prev) == 0) {
+ /* There is no previous element. Set the head pointer.*/
+ head = new_el;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ new_el->BASE_EL(prev)->BASE_EL(next) = new_el;
+ }
+
+ /* Update list length. */
+ listLen++;
+}
+
+/**
+ * \brief Insert an entire list immediatly after an element in this list.
+ *
+ * Elements are moved, not copied. Afterwards, the other list is empty. If
+ * prev_el is NULL then the elements are prepended to the front of the list.
+ * If prev_el is not in the list then undefined behaviour results. All
+ * elements are inserted into the list at once, so this is an O(1) operation.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addAfter( Element *prev_el, DList<DLMEL_TEMPUSE> &dl )
+{
+ /* Do not bother if dl has no elements. */
+ if ( dl.listLen == 0 )
+ return;
+
+ /* Set the previous pointer of dl.head to prev_el. We do
+ * this regardless of the state of the list. */
+ dl.head->BASE_EL(prev) = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ dl.tail->BASE_EL(next) = head;
+ head = dl.head;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ dl.tail->BASE_EL(next) = prev_el->BASE_EL(next);
+ prev_el->BASE_EL(next) = dl.head;
+ }
+
+ /* Set reverse pointers. */
+ if (dl.tail->BASE_EL(next) == 0) {
+ /* There is no next element. Set the tail pointer. */
+ tail = dl.tail;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ dl.tail->BASE_EL(next)->BASE_EL(prev) = dl.tail;
+ }
+
+ /* Update the list length. */
+ listLen += dl.listLen;
+
+ /* Empty out dl. */
+ dl.head = dl.tail = 0;
+ dl.listLen = 0;
+}
+
+/**
+ * \brief Insert an entire list immediately before an element in this list.
+ *
+ * Elements are moved, not copied. Afterwards, the other list is empty. If
+ * next_el is NULL then the elements are appended to the end of the list. If
+ * next_el is not in the list then undefined behaviour results. All elements
+ * are inserted at once, so this is an O(1) operation.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::
+ addBefore( Element *next_el, DList<DLMEL_TEMPUSE> &dl )
+{
+ /* Do not bother if dl has no elements. */
+ if ( dl.listLen == 0 )
+ return;
+
+ /* Set the next pointer of dl.tail to next_el. We do
+ * this regardless of the state of the list. */
+ dl.tail->BASE_EL(next) = next_el;
+
+ /* Set reverse pointers. */
+ if (next_el == 0) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ dl.head->BASE_EL(prev) = tail;
+ tail = dl.tail;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ dl.head->BASE_EL(prev) = next_el->BASE_EL(prev);
+ next_el->BASE_EL(prev) = dl.tail;
+ }
+
+ /* Set forward pointers. */
+ if (dl.head->BASE_EL(prev) == 0) {
+ /* There is no previous element. Set the head pointer.*/
+ head = dl.head;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ dl.head->BASE_EL(prev)->BASE_EL(next) = dl.head;
+ }
+
+ /* Update list length. */
+ listLen += dl.listLen;
+
+ /* Empty out dl. */
+ dl.head = dl.tail = 0;
+ dl.listLen = 0;
+}
+
+
+/**
+ * \brief Detach an element from the list.
+ *
+ * The element is not deleted. If the element is not in the list, then
+ * undefined behaviour results.
+ *
+ * \returns The element detached.
+ */
+template <DLMEL_TEMPDEF> Element *DList<DLMEL_TEMPUSE>::
+ detach(Element *el)
+{
+ /* Set forward pointers to skip over el. */
+ if (el->BASE_EL(prev) == 0)
+ head = el->BASE_EL(next);
+ else {
+ el->BASE_EL(prev)->BASE_EL(next) =
+ el->BASE_EL(next);
+ }
+
+ /* Set reverse pointers to skip over el. */
+ if (el->BASE_EL(next) == 0)
+ tail = el->BASE_EL(prev);
+ else {
+ el->BASE_EL(next)->BASE_EL(prev) =
+ el->BASE_EL(prev);
+ }
+
+ /* Update List length and return element we detached. */
+ listLen--;
+ return el;
+}
+
+/**
+ * \brief Clear the list by deleting all elements.
+ *
+ * Each item in the list is deleted. The list is reset to its initial state.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::empty()
+{
+ Element *nextToGo = 0, *cur = head;
+
+ while (cur != 0)
+ {
+ nextToGo = cur->BASE_EL(next);
+ delete cur;
+ cur = nextToGo;
+ }
+ head = tail = 0;
+ listLen = 0;
+}
+
+/**
+ * \brief Clear the list by forgetting all elements.
+ *
+ * All elements are abandoned, not deleted. The list is reset to it's initial
+ * state.
+ */
+template <DLMEL_TEMPDEF> void DList<DLMEL_TEMPUSE>::abandon()
+{
+ head = tail = 0;
+ listLen = 0;
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
diff --git a/aapl/dlist.h b/aapl/dlist.h
new file mode 100644
index 0000000..eaf3e5d
--- /dev/null
+++ b/aapl/dlist.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_DLIST_H
+#define _AAPL_DLIST_H
+
+#define BASE_EL(name) name
+#define DLMEL_TEMPDEF class Element
+#define DLMEL_TEMPUSE Element
+#define DList DList
+
+/**
+ * \addtogroup dlist
+ * @{
+ */
+
+/**
+ * \class DList
+ * \brief Basic doubly linked list.
+ *
+ * DList is the standard by-structure list type. This class requires the
+ * programmer to declare a list element type that has the necessary next and
+ * previous pointers in it. This can be achieved by inheriting from the
+ * DListEl class or by simply adding next and previous pointers directly into
+ * the list element class.
+ *
+ * DList does not assume ownership of elements in the list. If the elements
+ * are known to reside on the heap, the provided empty() routine can be used to
+ * delete all elements, however the destructor will not call this routine, it
+ * will simply abandon all the elements. It is up to the programmer to
+ * explicitly de-allocate items when necessary.
+ *
+ * \include ex_dlist.cpp
+ */
+
+/*@}*/
+
+#include "dlcommon.h"
+
+#undef BASE_EL
+#undef DLMEL_TEMPDEF
+#undef DLMEL_TEMPUSE
+#undef DList
+
+#endif /* _AAPL_DLIST_H */
+
diff --git a/aapl/dlistmel.h b/aapl/dlistmel.h
new file mode 100644
index 0000000..3433139
--- /dev/null
+++ b/aapl/dlistmel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_DLISTMEL_H
+#define _AAPL_DLISTMEL_H
+
+/**
+ * \addtogroup dlist
+ * @{
+ */
+
+/**
+ * \class DListMel
+ * \brief Doubly linked list for elements that may appear in multiple lists.
+ *
+ * This class is similar to DList, except that the user defined list element
+ * can inherit from multple DListEl classes and consequently be an element in
+ * multiple lists. In other words, DListMel allows a single instance of a data
+ * structure to be an element in multiple lists without the lists interfereing
+ * with one another.
+ *
+ * For each list that an element class is to appear in, the element must have
+ * unique next and previous pointers that can be unambiguously refered to with
+ * some base class name. This name is given to DListMel as a template argument
+ * so it can use the correct next and previous pointers in its list
+ * operations.
+ *
+ * DListMel does not assume ownership of elements in the list. If the elements
+ * are known to reside on the heap and are not contained in any other list or
+ * data structure, the provided empty() routine can be used to delete all
+ * elements, however the destructor will not call this routine, it will simply
+ * abandon all the elements. It is up to the programmer to explicitly
+ * de-allocate items when it is safe to do so.
+ *
+ * \include ex_dlistmel.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) BaseEl::name
+#define DLMEL_TEMPDEF class Element, class BaseEl
+#define DLMEL_TEMPUSE Element, BaseEl
+#define DList DListMel
+
+#include "dlcommon.h"
+
+#undef BASE_EL
+#undef DLMEL_TEMPDEF
+#undef DLMEL_TEMPUSE
+#undef DList
+
+#endif /* _AAPL_DLISTMEL_H */
+
diff --git a/aapl/dlistval.h b/aapl/dlistval.h
new file mode 100644
index 0000000..6f24999
--- /dev/null
+++ b/aapl/dlistval.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_DLISTVAL_H
+#define _AAPL_DLISTVAL_H
+
+/**
+ * \addtogroup dlist
+ * @{
+ */
+
+/**
+ * \class DListVal
+ * \brief By-value doubly linked list.
+ *
+ * This class is a doubly linked list that does not require a list element
+ * type to be declared. The user instead gives a type that is to be stored in
+ * the list element. When inserting a new data item, the value is copied into
+ * a newly allocated element. This list is inteded to behave and be utilized
+ * like the list template found in the STL.
+ *
+ * DListVal is different from the other lists in that it allocates elements
+ * itself. The raw element insert interface is still exposed for convenience,
+ * however, the list assumes all elements in the list are allocated on the
+ * heap and are to be managed by the list. The destructor WILL delete the
+ * contents of the list. If the list is ever copied in from another list, the
+ * existing contents are deleted first. This is in contrast to DList and
+ * DListMel, which will never delete their contents to allow for statically
+ * allocated elements.
+ *
+ * \include ex_dlistval.cpp
+ */
+
+/*@}*/
+
+#define BASE_EL(name) name
+#define DLMEL_TEMPDEF class T
+#define DLMEL_TEMPUSE T
+#define DList DListVal
+#define Element DListValEl<T>
+#define DOUBLELIST_VALUE
+
+#include "dlcommon.h"
+
+#undef BASE_EL
+#undef DLMEL_TEMPDEF
+#undef DLMEL_TEMPUSE
+#undef DList
+#undef Element
+#undef DOUBLELIST_VALUE
+
+#endif /* _AAPL_DLISTVAL_H */
+
diff --git a/aapl/insertsort.h b/aapl/insertsort.h
new file mode 100644
index 0000000..eb3e264
--- /dev/null
+++ b/aapl/insertsort.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_INSERTSORT_H
+#define _AAPL_INSERTSORT_H
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup sort
+ * @{
+ */
+
+/**
+ * \class InsertSort
+ * \brief Insertion sort an array of data.
+ *
+ * InsertSort can be used to sort any array of objects of type T provided a
+ * compare class is given. InsertSort is in-place. It does not require any
+ * temporary storage.
+ *
+ * Objects are not made aware that they are being moved around in memory.
+ * Assignment operators, constructors and destructors are never invoked by the
+ * sort.
+ *
+ * InsertSort runs in O(n^2) time. It is most useful when sorting small arrays.
+ * where it can outperform the O(n*log(n)) sorters due to its simplicity.
+ * InsertSort is a not a stable sort. Elements with the same key will not have
+ * their relative ordering preserved.
+ */
+
+/*@}*/
+
+/* InsertSort. */
+template <class T, class Compare> class InsertSort
+ : public Compare
+{
+public:
+ /* Sorting interface routine. */
+ void sort(T *data, long len);
+};
+
+
+/**
+ * \brief Insertion sort an array of data.
+ */
+template <class T, class Compare>
+ void InsertSort<T,Compare>::sort(T *data, long len)
+{
+ /* For each next largest spot in the sorted array... */
+ for ( T *dest = data; dest < data+len-1; dest++ ) {
+ /* Find the next smallest element in the unsorted array. */
+ T *smallest = dest;
+ for ( T *src = dest+1; src < data+len; src++ ) {
+ /* If src is smaller than the current src, then use it. */
+ if ( compare( *src, *smallest ) < 0 )
+ smallest = src;
+ }
+
+ if ( smallest != dest ) {
+ /* Swap dest, smallest. */
+ char tmp[sizeof(T)];
+ memcpy( tmp, dest, sizeof(T) );
+ memcpy( dest, smallest, sizeof(T) );
+ memcpy( smallest, tmp, sizeof(T) );
+ }
+ }
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_INSERTSORT_H */
diff --git a/aapl/mergesort.h b/aapl/mergesort.h
new file mode 100644
index 0000000..d017511
--- /dev/null
+++ b/aapl/mergesort.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_MERGESORT_H
+#define _AAPL_MERGESORT_H
+
+#include "bubblesort.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup sort
+ * @{
+ */
+
+/**
+ * \class MergeSort
+ * \brief Merge sort an array of data.
+ *
+ * MergeSort can be used to sort any array of objects of type T provided a
+ * compare class is given. MergeSort is not in-place, it requires temporary
+ * storage equal to the size of the array. The temporary storage is allocated
+ * on the heap.
+ *
+ * Objects are not made aware that they are being moved around in memory.
+ * Assignment operators, constructors and destructors are never invoked by the
+ * sort.
+ *
+ * MergeSort runs in worst case O(n*log(n)) time. In most cases it is slower
+ * than QuickSort because more copying is neccessary. But on the other hand,
+ * it is a stable sort, meaning that objects with the same key have their
+ * relative ordering preserved. Also, its worst case is better. MergeSort
+ * switches to a BubbleSort when the size of the array being sorted is small.
+ * This happens when directly sorting a small array or when MergeSort calls
+ * itself recursively on a small portion of a larger array.
+ */
+
+/*@}*/
+
+
+/* MergeSort. */
+template <class T, class Compare> class MergeSort
+ : public BubbleSort<T, Compare>
+{
+public:
+ /* Sorting interface routine. */
+ void sort(T *data, long len);
+
+private:
+ /* Recursive worker. */
+ void doSort(T *tmpStor, T *data, long len);
+};
+
+#define _MS_BUBBLE_THRESH 16
+
+/* Recursive mergesort worker. Split data, make recursive calls, merge
+ * results. */
+template< class T, class Compare> void MergeSort<T,Compare>::
+ doSort(T *tmpStor, T *data, long len)
+{
+ if ( len <= 1 )
+ return;
+
+ if ( len <= _MS_BUBBLE_THRESH ) {
+ BubbleSort<T, Compare>::sort( data, len );
+ return;
+ }
+
+ long mid = len / 2;
+
+ doSort( tmpStor, data, mid );
+ doSort( tmpStor + mid, data + mid, len - mid );
+
+ /* Merge the data. */
+ T *endLower = data + mid, *lower = data;
+ T *endUpper = data + len, *upper = data + mid;
+ T *dest = tmpStor;
+ while ( true ) {
+ if ( lower == endLower ) {
+ /* Possibly upper left. */
+ if ( upper != endUpper )
+ memcpy( dest, upper, (endUpper - upper) * sizeof(T) );
+ break;
+ }
+ else if ( upper == endUpper ) {
+ /* Only lower left. */
+ if ( lower != endLower )
+ memcpy( dest, lower, (endLower - lower) * sizeof(T) );
+ break;
+ }
+ else {
+ /* Both upper and lower left. */
+ if ( compare(*lower, *upper) <= 0 )
+ memcpy( dest++, lower++, sizeof(T) );
+ else
+ memcpy( dest++, upper++, sizeof(T) );
+ }
+ }
+
+ /* Copy back from the tmpStor array. */
+ memcpy( data, tmpStor, sizeof( T ) * len );
+}
+
+/**
+ * \brief Merge sort an array of data.
+ */
+template< class T, class Compare>
+ void MergeSort<T,Compare>::sort(T *data, long len)
+{
+ /* Allocate the tmp space needed by merge sort, sort and free. */
+ T *tmpStor = (T*) new char[sizeof(T) * len];
+ doSort( tmpStor, data, len );
+ delete[] (char*) tmpStor;
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_MERGESORT_H */
diff --git a/aapl/quicksort.h b/aapl/quicksort.h
new file mode 100644
index 0000000..9bb96ef
--- /dev/null
+++ b/aapl/quicksort.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_QUICKSORT_H
+#define _AAPL_QUICKSORT_H
+
+#include "insertsort.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup sort
+ * @{
+ */
+
+/**
+ * \class QuickSort
+ * \brief Quick sort an array of data.
+ *
+ * QuickSort can be used to sort any array of objects of type T provided a
+ * compare class is given. QuickSort is in-place. It does not require any
+ * temporary storage.
+ *
+ * Objects are not made aware that they are being moved around in memory.
+ * Assignment operators, constructors and destructors are never invoked by the
+ * sort.
+ *
+ * QuickSort runs in O(n*log(n)) time in the average case. It is faster than
+ * mergsort in the average case because it does less moving of data. The
+ * performance of quicksort depends mostly on the choice of pivot. This
+ * implementation picks the pivot as the median of first, middle, last. This
+ * choice of pivot avoids the O(n^2) worst case for input already sorted, but
+ * it is still possible to encounter the O(n^2) worst case. For example an
+ * array of identical elements will run in O(n^2)
+ *
+ * QuickSort is not a stable sort. Elements with the same key will not have
+ * their relative ordering preserved. QuickSort switches to an InsertSort
+ * when the size of the array being sorted is small. This happens when
+ * directly sorting a small array or when QuickSort calls iteself recursively
+ * on a small portion of a larger array.
+ */
+
+/*@}*/
+
+/* QuickSort. */
+template <class T, class Compare> class QuickSort :
+ public InsertSort<T, Compare>
+{
+public:
+ /* Sorting interface routine. */
+ void sort(T *data, long len);
+
+private:
+ /* Recursive worker. */
+ void doSort(T *start, T *end);
+ T *partition(T *start, T *end);
+ inline T *median(T *start, T *end);
+};
+
+#define _QS_INSERTION_THRESH 16
+
+/* Finds the median of start, middle, end. */
+template <class T, class Compare> T *QuickSort<T,Compare>::
+ median(T *start, T *end)
+{
+ T *pivot, *mid = start + (end-start)/2;
+
+ /* CChoose the pivot. */
+ if ( compare(*start, *mid) < 0 ) {
+ if ( compare(*mid, *end) < 0 )
+ pivot = mid;
+ else if ( compare(*start, *end) < 0 )
+ pivot = end;
+ else
+ pivot = start;
+ }
+ else if ( compare(*start, *end) < 0 )
+ pivot = start;
+ else if ( compare(*mid, *end) < 0 )
+ pivot = end;
+ else
+ pivot = mid;
+
+ return pivot;
+}
+
+template <class T, class Compare> T *QuickSort<T,Compare>::
+ partition(T *start, T *end)
+{
+ /* Use the median of start, middle, end as the pivot. First save
+ * it off then move the last element to the free spot. */
+ char pcPivot[sizeof(T)];
+ T *pivot = median(start, end);
+
+ memcpy( pcPivot, pivot, sizeof(T) );
+ if ( pivot != end )
+ memcpy( pivot, end, sizeof(T) );
+
+ T *first = start-1;
+ T *last = end;
+ pivot = (T*) pcPivot;
+
+ /* Shuffle element to the correct side of the pivot, ending
+ * up with the free spot where the pivot will go. */
+ while ( true ) {
+ /* Throw one element ahead to the free spot at last. */
+ while ( true ) {
+ first += 1;
+ if ( first == last )
+ goto done;
+ if ( compare( *first, *pivot ) > 0 ) {
+ memcpy(last, first, sizeof(T));
+ break;
+ }
+ }
+
+ /* Throw one element back to the free spot at first. */
+ while ( true ) {
+ last -= 1;
+ if ( last == first )
+ goto done;
+ if ( compare( *last, *pivot ) < 0 ) {
+ memcpy(first, last, sizeof(T));
+ break;
+ }
+ }
+ }
+done:
+ /* Put the pivot into the middle spot for it. */
+ memcpy( first, pivot, sizeof(T) );
+ return first;
+}
+
+
+template< class T, class Compare> void QuickSort<T,Compare>::
+ doSort(T *start, T *end)
+{
+ long len = end - start + 1;
+ if ( len > _QS_INSERTION_THRESH ) {
+ /* Use quicksort. */
+ T *pivot = partition( start, end );
+ doSort(start, pivot-1);
+ doSort(pivot+1, end);
+ }
+ else if ( len > 1 ) {
+ /* Array is small, use insertion sort. */
+ InsertSort<T, Compare>::sort( start, len );
+ }
+}
+
+/**
+ * \brief Quick sort an array of data.
+ */
+template< class T, class Compare>
+ void QuickSort<T,Compare>::sort(T *data, long len)
+{
+ /* Call recursive worker. */
+ doSort(data, data+len-1);
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_QUICKSORT_H */
diff --git a/aapl/resize.h b/aapl/resize.h
new file mode 100644
index 0000000..24edc16
--- /dev/null
+++ b/aapl/resize.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_RESIZE_H
+#define _AAPL_RESIZE_H
+
+#include <assert.h>
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/* This step is expressed in units of T. Changing this requires changes to
+ * docs in ResizeLin constructor. */
+#define LIN_DEFAULT_STEP 256
+
+/*
+ * Resizing macros giving different resize methods.
+ */
+
+/* If needed is greater than existing, give twice needed. */
+#define EXPN_UP( existing, needed ) \
+ needed > existing ? (needed<<1) : existing
+
+/* If needed is less than 1 quarter existing, give twice needed. */
+#define EXPN_DOWN( existing, needed ) \
+ needed < (existing>>2) ? (needed<<1) : existing
+
+/* If needed is greater than existing, give needed plus step. */
+#define LIN_UP( existing, needed ) \
+ needed > existing ? (needed+step) : existing
+
+/* If needed is less than existing - 2 * step then give needed plus step. */
+#define LIN_DOWN( existing, needed ) \
+ needed < (existing-(step<<1)) ? (needed+step) : existing
+
+/* Return existing. */
+#define CONST_UP( existing, needed ) existing
+
+/* Return existing. */
+#define CONST_DOWN( existing, needed ) existing
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class ResizeLin
+ * \brief Linear table resizer.
+ *
+ * When an up resize or a down resize is needed, ResizeLin allocates the space
+ * needed plus some user defined step. The result is that when growing the
+ * vector in a linear fashion, the number of resizes is also linear.
+ *
+ * If only up resizing is done, then there will never be more than step unused
+ * spaces in the vector. If down resizing is done as well, there will never be
+ * more than 2*step unused spaces in the vector. The up resizing and down
+ * resizing policies are offset to improve performance when repeatedly
+ * inserting and removing a small number of elements relative to the step.
+ * This scheme guarantees that repetitive inserting and removing of a small
+ * number of elements will never result in repetative reallocation.
+ *
+ * The vectors pass sizes to the resizer in units of T, so the step gets
+ * interpreted as units of T.
+ */
+
+/*@}*/
+
+/* Linear resizing. */
+class ResizeLin
+{
+protected:
+ /**
+ * \brief Default constructor.
+ *
+ * Intializes resize step to 256 units of the table type T.
+ */
+ ResizeLin() : step(LIN_DEFAULT_STEP) { }
+
+ /**
+ * \brief Determine the new table size when up resizing.
+ *
+ * If the existing size is insufficient for the space needed, then allocate
+ * the space needed plus the step. The step is in units of T.
+ */
+ inline long upResize( long existing, long needed )
+ { return LIN_UP(existing, needed); }
+
+ /**
+ * \brief Determine the new table size when down resizing.
+ *
+ * If space needed is less than the existing - 2*step, then allocate the
+ * space needed space plus the step. The step is in units of T.
+ */
+ inline long downResize( long existing, long needed )
+ { return LIN_DOWN(existing, needed); }
+
+public:
+ /**
+ * \brief Step for linear resize.
+ *
+ * Amount of extra space in units of T added each time a resize must take
+ * place. This may be changed at any time. The step should be >= 0.
+ */
+ long step;
+};
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class ResizeCtLin
+ * \brief Linear table resizer with compile time step.
+ *
+ * When an up resize or a down resize is needed, ResizeCtLin allocates the
+ * space needed plus some compile time defined step. The result is that when
+ * growing the vector in a linear fashion, the number of resizes is also
+ * linear.
+ *
+ * If only up resizing is done, then there will never be more than step unused
+ * spaces in the vector. If down resizing is done as well, there will never be
+ * more than 2*step unused spaces in the vector. The up resizing and down
+ * resizing policies are offset to improve performance when repeatedly
+ * inserting and removing a small number of elements relative to the step.
+ * This scheme guarantees that repetitive inserting and removing of a small
+ * number of elements will never result in repetative reallocation.
+ *
+ * The vectors pass sizes to the resizer in units of T, so the step gets
+ * interpreted as units of T.
+ */
+
+/*@}*/
+
+/* Linear resizing. */
+template <long step> class ResizeCtLin
+{
+protected:
+ /**
+ * \brief Determine the new table size when up resizing.
+ *
+ * If the existing size is insufficient for the space needed, then allocate
+ * the space needed plus the step. The step is in units of T.
+ */
+ inline long upResize( long existing, long needed )
+ { return LIN_UP(existing, needed); }
+
+ /**
+ * \brief Determine the new table size when down resizing.
+ *
+ * If space needed is less than the existing - 2*step, then allocate the
+ * space needed space plus the step. The step is in units of T.
+ */
+ inline long downResize( long existing, long needed )
+ { return LIN_DOWN(existing, needed); }
+};
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class ResizeConst
+ * \brief Constant table resizer.
+ *
+ * When an up resize is needed the existing size is always used. ResizeConst
+ * does not allow dynamic resizing. To use ResizeConst, the vector needs to be
+ * constructed with and initial allocation amount otherwise it will be
+ * unusable.
+ */
+
+/*@}*/
+
+/* Constant table resizing. */
+class ResizeConst
+{
+protected:
+ /* Assert don't need more than exists. Return existing. */
+ static inline long upResize( long existing, long needed );
+
+ /**
+ * \brief Determine the new table size when down resizing.
+ *
+ * Always returns the existing table size.
+ */
+ static inline long downResize( long existing, long needed )
+ { return CONST_DOWN(existing, needed); }
+};
+
+/**
+ * \brief Determine the new table size when up resizing.
+ *
+ * If the existing size is insufficient for the space needed, then an assertion
+ * will fail. Otherwise returns the existing size.
+ */
+inline long ResizeConst::upResize( long existing, long needed )
+{
+ assert( needed <= existing );
+ return CONST_UP(existing, needed);
+}
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class ResizeRunTime
+ * \brief Run time settable table resizer.
+ *
+ * ResizeRunTime can have it's up and down resizing policies set at run time.
+ * Both up and down policies can be set independently to one of Exponential,
+ * Linear, or Constant. See the documentation for ResizeExpn, ResizeLin, and
+ * ResizeConst for the details of the resizing policies.
+ *
+ * The policies may be changed at any time. The default policies are
+ * both Exponential.
+ */
+
+/*@}*/
+
+/* Run time resizing. */
+class ResizeRunTime
+{
+protected:
+ /**
+ * \brief Default constuctor.
+ *
+ * The up and down resizing it initialized to Exponetial. The step
+ * defaults to 256 units of T.
+ */
+ inline ResizeRunTime();
+
+ /**
+ * \brief Resizing policies.
+ */
+ enum ResizeType {
+ Exponential, /*!< Exponential resizing. */
+ Linear, /*!< Linear resizing. */
+ Constant /*!< Constant table size. */
+ };
+
+ inline long upResize( long existing, long needed );
+ inline long downResize( long existing, long needed );
+
+public:
+ /**
+ * \brief Step for linear resize.
+ *
+ * Amount of extra space in units of T added each time a resize must take
+ * place. This may be changed at any time. The step should be >= 0.
+ */
+ long step;
+
+ /**
+ * \brief Up resizing policy.
+ */
+ ResizeType upResizeType;
+
+ /**
+ * \brief Down resizing policy.
+ */
+ ResizeType downResizeType;
+};
+
+inline ResizeRunTime::ResizeRunTime()
+:
+ step( LIN_DEFAULT_STEP ),
+ upResizeType( Exponential ),
+ downResizeType( Exponential )
+{
+}
+
+/**
+ * \brief Determine the new table size when up resizing.
+ *
+ * Type of up resizing is determined by upResizeType. Exponential, Linear and
+ * Constant resizing is the same as that of ResizeExpn, ResizeLin and
+ * ResizeConst.
+ */
+inline long ResizeRunTime::upResize( long existing, long needed )
+{
+ switch ( upResizeType ) {
+ case Exponential:
+ return EXPN_UP(existing, needed);
+ case Linear:
+ return LIN_UP(existing, needed);
+ case Constant:
+ assert( needed <= existing );
+ return CONST_UP(existing, needed);
+ }
+ return 0;
+};
+
+/**
+ * \brief Determine the new table size when down resizing.
+ *
+ * Type of down resizing is determined by downResiizeType. Exponential, Linear
+ * and Constant resizing is the same as that of ResizeExpn, ResizeLin and
+ * ResizeConst.
+ */
+inline long ResizeRunTime::downResize( long existing, long needed )
+{
+ switch ( downResizeType ) {
+ case Exponential:
+ return EXPN_DOWN(existing, needed);
+ case Linear:
+ return LIN_DOWN(existing, needed);
+ case Constant:
+ return CONST_DOWN(existing, needed);
+ }
+ return 0;
+}
+
+/* Don't need these anymore. */
+#undef EXPN_UP
+#undef EXPN_DOWN
+#undef LIN_UP
+#undef LIN_DOWN
+#undef CONST_UP
+#undef CONST_DOWN
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_RESIZE_H */
diff --git a/aapl/sbstmap.h b/aapl/sbstmap.h
new file mode 100644
index 0000000..9436a47
--- /dev/null
+++ b/aapl/sbstmap.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_SBSTMAP_H
+#define _AAPL_SBSTMAP_H
+
+#include "compare.h"
+#include "svector.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \brief Element for BstMap.
+ *
+ * Stores the key and value pair.
+ */
+template <class Key, class Value> struct SBstMapEl
+{
+ SBstMapEl() {}
+ SBstMapEl(const Key &key) : key(key) {}
+ SBstMapEl(const Key &key, const Value &val) : key(key), value(val) {}
+
+ /** \brief The key */
+ Key key;
+
+ /** \brief The value. */
+ Value value;
+};
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class SBstMap
+ * \brief Copy-on-write binary search table for key and value pairs.
+ *
+ * This is a map style binary search table that employs the copy-on-write
+ * mechanism for table data. BstMap stores key and value pairs in each
+ * element. The key and value can be any type. A compare class for the key
+ * must be supplied.
+ */
+
+/*@}*/
+
+#define BST_TEMPL_DECLARE class Key, class Value, \
+ class Compare = CmpOrd<Key>, class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize
+#define BST_TEMPL_USE Key, Value, Compare, Resize
+#define GET_KEY(el) ((el).key)
+#define BstTable SBstMap
+#define Vector SVector
+#define Table STable
+#define Element SBstMapEl<Key, Value>
+#define BSTMAP
+#define SHARED_BST
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BstTable
+#undef Vector
+#undef Table
+#undef Element
+#undef BSTMAP
+#undef SHARED_BST
+
+/**
+ * \fn SBstMap::insert(const Key &key, BstMapEl<Key, Value> **lastFound)
+ * \brief Insert the given key.
+ *
+ * If the given key does not already exist in the table then a new element
+ * having key is inserted. They key copy constructor and value default
+ * constructor are used to place the pair in the table. If lastFound is given,
+ * it is set to the new entry created. If the insert fails then lastFound is
+ * set to the existing pair of the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn SBstMap::insertMulti(const Key &key)
+ * \brief Insert the given key even if it exists already.
+ *
+ * If the key exists already then the new element having key is placed next
+ * to some other pair of the same key. InsertMulti cannot fail. The key copy
+ * constructor and the value default constructor are used to place the pair in
+ * the table.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_SBSTMAP_H */
diff --git a/aapl/sbstset.h b/aapl/sbstset.h
new file mode 100644
index 0000000..fe8ddf6
--- /dev/null
+++ b/aapl/sbstset.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_SBSTSET_H
+#define _AAPL_SBSTSET_H
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class SBstSet
+ * \brief Copy-on-write binary search table for types that are the key.
+ *
+ * This is a set style binary search table that employs the copy-on-write
+ * mechanism for storing table data. BstSet is suitable for types that
+ * comprise the entire key. Rather than look into the element to retrieve the
+ * key, the element is the key. A class that contains a comparison routine
+ * for the key must be given.
+ */
+
+/*@}*/
+
+#include "compare.h"
+#include "svector.h"
+
+#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd<Key>, \
+ class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Key, class Compare, class Resize
+#define BST_TEMPL_USE Key, Compare, Resize
+#define GET_KEY(el) (el)
+#define BstTable SBstSet
+#define Vector SVector
+#define Table STable
+#define Element Key
+#define BSTSET
+#define SHARED_BST
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BstTable
+#undef Vector
+#undef Table
+#undef Element
+#undef BSTSET
+#undef SHARED_BST
+
+/**
+ * \fn SBstSet::insert(const Key &key, Key **lastFound)
+ * \brief Insert the given key.
+ *
+ * If the given key does not already exist in the table then it is inserted.
+ * The key's copy constructor is used to place the item in the table. If
+ * lastFound is given, it is set to the new entry created. If the insert fails
+ * then lastFound is set to the existing key of the same value.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn SBstSet::insertMulti(const Key &key)
+ * \brief Insert the given key even if it exists already.
+ *
+ * If the key exists already then it is placed next to some other key of the
+ * same value. InsertMulti cannot fail. The key's copy constructor is used to
+ * place the item in the table.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_SBSTSET_H */
diff --git a/aapl/sbsttable.h b/aapl/sbsttable.h
new file mode 100644
index 0000000..100b87e
--- /dev/null
+++ b/aapl/sbsttable.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_SBSTTABLE_H
+#define _AAPL_SBSTTABLE_H
+
+#include "compare.h"
+#include "svector.h"
+
+/**
+ * \addtogroup bst
+ * @{
+ */
+
+/**
+ * \class SBstTable
+ * \brief Copy-on-write binary search table for structures that contain a key.
+ *
+ * This is a basic binary search table that employs a copy-on-write data
+ * storage mechanism. It can be used to contain a structure that has a key and
+ * possibly some data. The key should be a member of the element class and
+ * accessible with getKey(). A class containing the compare routine must be
+ * supplied.
+ */
+
+/*@}*/
+
+#define BST_TEMPL_DECLARE class Element, class Key, \
+ class Compare = CmpOrd<Key>, class Resize = ResizeExpn
+#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize
+#define BST_TEMPL_USE Element, Key, Compare, Resize
+#define GET_KEY(el) ((el).getKey())
+#define BstTable SBstTable
+#define Vector SVector
+#define Table STable
+#define BSTTABLE
+#define SHARED_BST
+
+#include "bstcommon.h"
+
+#undef BST_TEMPL_DECLARE
+#undef BST_TEMPL_DEF
+#undef BST_TEMPL_USE
+#undef GET_KEY
+#undef BstTable
+#undef Vector
+#undef Table
+#undef BSTTABLE
+#undef SHARED_BST
+
+/**
+ * \fn SBstTable::insert(const Key &key, Element **lastFound)
+ * \brief Insert a new element with the given key.
+ *
+ * If the given key does not already exist in the table a new element is
+ * inserted with the given key. A constructor taking only const Key& is used
+ * to initialize the new element. If lastFound is given, it is set to the new
+ * element created. If the insert fails then lastFound is set to the existing
+ * element with the same key.
+ *
+ * \returns The new element created upon success, null upon failure.
+ */
+
+/**
+ * \fn SBstTable::insertMulti(const Key &key)
+ * \brief Insert a new element even if the key exists already.
+ *
+ * If the key exists already then the new element is placed next to some
+ * element with the same key. InsertMulti cannot fail. A constructor taking
+ * only const Key& is used to initialize the new element.
+ *
+ * \returns The new element created.
+ */
+
+#endif /* _AAPL_SBSTTABLE_H */
diff --git a/aapl/svector.h b/aapl/svector.h
new file mode 100644
index 0000000..ff9e97c
--- /dev/null
+++ b/aapl/svector.h
@@ -0,0 +1,1426 @@
+/*
+ * Copyright 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_SVECTOR_H
+#define _AAPL_SVECTOR_H
+
+#include <new>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "table.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class SVector
+ * \brief Copy-on-write dynamic array.
+ *
+ * SVector is a variant of Vector that employs copy-on-write behaviour. The
+ * SVector copy constructor and = operator make shallow copies. If a vector
+ * that references shared data is modified with insert, replace, append,
+ * prepend, setAs or remove, a new copy is made so as not to interfere with
+ * the shared data. However, shared individual elements may be modified by
+ * bypassing the SVector interface.
+ *
+ * SVector is a dynamic array that can be used to contain complex data
+ * structures that have constructors and destructors as well as simple types
+ * such as integers and pointers.
+ *
+ * SVector supports inserting, overwriting, and removing single or multiple
+ * elements at once. Constructors and destructors are called wherever
+ * appropriate. For example, before an element is overwritten, it's
+ * destructor is called.
+ *
+ * SVector provides automatic resizing of allocated memory as needed and
+ * offers different allocation schemes for controlling how the automatic
+ * allocation is done. Two senses of the the length of the data is
+ * maintained: the amount of raw memory allocated to the vector and the number
+ * of actual elements in the vector. The various allocation schemes control
+ * how the allocated space is changed in relation to the number of elements in
+ * the vector.
+ */
+
+/*@}*/
+
+/* SVector */
+template < class T, class Resize = ResizeExpn > class SVector :
+ public STable<T>, public Resize
+{
+private:
+ typedef STable<T> BaseTable;
+
+public:
+ /**
+ * \brief Initialize an empty vector with no space allocated.
+ *
+ * If a linear resizer is used, the step defaults to 256 units of T. For a
+ * runtime vector both up and down allocation schemes default to
+ * Exponential.
+ */
+ SVector() { }
+
+ /**
+ * \brief Create a vector that contains an initial element.
+ *
+ * The vector becomes one element in length. The element's copy
+ * constructor is used to place the value in the vector.
+ */
+ SVector(const T &val) { setAs(&val, 1); }
+
+ /**
+ * \brief Create a vector that contains an array of elements.
+ *
+ * The vector becomes len elements in length. Copy constructors are used
+ * to place the new elements in the vector.
+ */
+ SVector(const T *val, long len) { setAs(val, len); }
+
+ /* Shallow copy. */
+ SVector( const SVector &v );
+
+ /* Shallow copy. */
+ SVector(STabHead *head);
+
+ /**
+ * \brief Free all memory used by the vector.
+ *
+ * The vector is reset to zero elements. Destructors are called on all
+ * elements in the vector. The space allocated for the vector is freed.
+ */
+ ~SVector() { empty(); }
+
+ /* Delete all items. */
+ void empty();
+
+ /**
+ * \brief Deep copy another vector into this vector.
+ *
+ * Copies the entire contents of the other vector into this vector. Any
+ * existing contents are first deleted. Equivalent to setAs.
+ */
+ void deepCopy( const SVector &v ) { setAs(v.data, v.length()); }
+
+ /* Perform a shallow copy of another vector. */
+ SVector &operator=( const SVector &v );
+
+ /* Perform a shallow copy of another vector by the header. */
+ SVector &operator=( STabHead *head );
+
+
+ /*@{*/
+ /**
+ * \brief Insert one element at position pos.
+ *
+ * Elements in the vector from pos onward are shifted one space to the
+ * right. The copy constructor is used to place the element into this
+ * vector. If pos is greater than the length of the vector then undefined
+ * behaviour results. If pos is negative then it is treated as an offset
+ * relative to the length of the vector.
+ */
+ void insert(long pos, const T &val) { insert(pos, &val, 1); }
+
+ /* Insert an array of values. */
+ void insert(long pos, const T *val, long len);
+
+ /**
+ * \brief Insert all the elements from another vector at position pos.
+ *
+ * Elements in this vector from pos onward are shifted v.length() spaces
+ * to the right. The element's copy constructor is used to copy the items
+ * into this vector. The other vector is left unchanged. If pos is off the
+ * end of the vector, then undefined behaviour results. If pos is negative
+ * then it is treated as an offset relative to the length of the vector.
+ * Equivalent to vector.insert(pos, other.data, other.length()).
+ */
+ void insert(long pos, const SVector &v) { insert(pos, v.data, v.length()); }
+
+ /* Insert len copies of val into the vector. */
+ void insertDup(long pos, const T &val, long len);
+
+ /**
+ * \brief Insert one new element using the default constrcutor.
+ *
+ * Elements in the vector from pos onward are shifted one space to the right.
+ * The default constructor is used to init the new element. If pos is greater
+ * than the length of the vector then undefined behaviour results. If pos is
+ * negative then it is treated as an offset relative to the length of the
+ * vector.
+ */
+ void insertNew(long pos) { insertNew(pos, 1); }
+
+ /* Insert len new items using default constructor. */
+ void insertNew(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Remove one element at position pos.
+ *
+ * The element's destructor is called. Elements to the right of pos are
+ * shifted one space to the left to take up the free space. If pos is greater
+ * than or equal to the length of the vector then undefined behavior results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+ void remove(long pos) { remove(pos, 1); }
+
+ /* Delete a number of elements. */
+ void remove(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Replace one element at position pos.
+ *
+ * If there is an existing element at position pos (if pos is less than the
+ * length of the vector) then its destructor is called before the space is
+ * used. The copy constructor is used to place the element into the vector.
+ * If pos is greater than the length of the vector then undefined behaviour
+ * results. If pos is negative then it is treated as an offset relative to
+ * the length of the vector.
+ */
+ void replace(long pos, const T &val) { replace(pos, &val, 1); }
+
+ /* Replace with an array of values. */
+ void replace(long pos, const T *val, long len);
+
+ /**
+ * \brief Replace at position pos with all the elements of another vector.
+ *
+ * Replace at position pos with all the elements of another vector. The other
+ * vector is left unchanged. If there are existing elements at the positions
+ * to be replaced, then destructors are called before the space is used. Copy
+ * constructors are used to place the elements into this vector. It is
+ * allowable for the pos and length of the other vector to specify a
+ * replacement that overwrites existing elements and creates new ones. If pos
+ * is greater than the length of the vector then undefined behaviour results.
+ * If pos is negative, then it is treated as an offset relative to the length
+ * of the vector.
+ */
+ void replace(long pos, const SVector &v) { replace(pos, v.data, v.length()); }
+
+ /* Replace len items with len copies of val. */
+ void replaceDup(long pos, const T &val, long len);
+
+ /**
+ * \brief Replace at position pos with one new element.
+ *
+ * If there is an existing element at the position to be replaced (pos is
+ * less than the length of the vector) then the element's destructor is
+ * called before the space is used. The default constructor is used to
+ * initialize the new element. If pos is greater than the length of the
+ * vector then undefined behaviour results. If pos is negative, then it is
+ * treated as an offset relative to the length of the vector.
+ */
+ void replaceNew(long pos) { replaceNew(pos, 1); }
+
+ /* Replace len items at pos with newly constructed objects. */
+ void replaceNew(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+
+ /**
+ * \brief Set the contents of the vector to be val exactly.
+ *
+ * The vector becomes one element in length. Destructors are called on any
+ * existing elements in the vector. The element's copy constructor is used to
+ * place the val in the vector.
+ */
+ void setAs(const T &val) { setAs(&val, 1); }
+
+ /* Set to the contents of an array. */
+ void setAs(const T *val, long len);
+
+ /**
+ * \brief Set the vector to exactly the contents of another vector.
+ *
+ * The vector becomes v.length() elements in length. Destructors are called
+ * on any existing elements. Copy constructors are used to place the new
+ * elements in the vector.
+ */
+ void setAs(const SVector &v) { setAs(v.data, v.length()); }
+
+ /* Set as len copies of item. */
+ void setAsDup(const T &item, long len);
+
+ /**
+ * \brief Set the vector to exactly one new item.
+ *
+ * The vector becomes one element in length. Destructors are called on any
+ * existing elements in the vector. The default constructor is used to
+ * init the new item.
+ */
+ void setAsNew() { setAsNew(1); }
+
+ /* Set as newly constructed objects using the default constructor. */
+ void setAsNew(long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Append one elment to the end of the vector.
+ *
+ * Copy constructor is used to place the element in the vector.
+ */
+ void append(const T &val) { replace(BaseTable::length(), &val, 1); }
+
+ /**
+ * \brief Append len elements to the end of the vector.
+ *
+ * Copy constructors are used to place the elements in the vector.
+ */
+ void append(const T *val, long len) { replace(BaseTable::length(), val, len); }
+
+ /**
+ * \brief Append the contents of another vector.
+ *
+ * The other vector is left unchanged. Copy constructors are used to place
+ * the elements in the vector.
+ */
+ void append(const SVector &v)
+ { replace(BaseTable::length(), v.data, v.length()); }
+
+ /**
+ * \brief Append len copies of item.
+ *
+ * The copy constructor is used to place the item in the vector.
+ */
+ void appendDup(const T &item, long len) { replaceDup(BaseTable::length(), item, len); }
+
+ /**
+ * \brief Append a single newly created item.
+ *
+ * The new element is initialized with the default constructor.
+ */
+ void appendNew() { replaceNew(BaseTable::length(), 1); }
+
+ /**
+ * \brief Append len newly created items.
+ *
+ * The new elements are initialized with the default constructor.
+ */
+ void appendNew(long len) { replaceNew(BaseTable::length(), len); }
+ /*@}*/
+
+
+ /*@{*/
+ /**
+ * \brief Prepend one elment to the front of the vector.
+ *
+ * Copy constructor is used to place the element in the vector.
+ */
+ void prepend(const T &val) { insert(0, &val, 1); }
+
+ /**
+ * \brief Prepend len elements to the front of the vector.
+ *
+ * Copy constructors are used to place the elements in the vector.
+ */
+ void prepend(const T *val, long len) { insert(0, val, len); }
+
+ /**
+ * \brief Prepend the contents of another vector.
+ *
+ * The other vector is left unchanged. Copy constructors are used to place
+ * the elements in the vector.
+ */
+ void prepend(const SVector &v) { insert(0, v.data, v.length()); }
+
+ /**
+ * \brief Prepend len copies of item.
+ *
+ * The copy constructor is used to place the item in the vector.
+ */
+ void prependDup(const T &item, long len) { insertDup(0, item, len); }
+
+ /**
+ * \brief Prepend a single newly created item.
+ *
+ * The new element is initialized with the default constructor.
+ */
+ void prependNew() { insertNew(0, 1); }
+
+ /**
+ * \brief Prepend len newly created items.
+ *
+ * The new elements are initialized with the default constructor.
+ */
+ void prependNew(long len) { insertNew(0, len); }
+ /*@}*/
+
+ /* Convenience access. */
+ T &operator[](int i) const { return BaseTable::data[i]; }
+ long size() const { return BaseTable::length(); }
+
+ /* Various classes for setting the iterator */
+ struct Iter;
+ struct IterFirst { IterFirst( const SVector &v ) : v(v) { } const SVector &v; };
+ struct IterLast { IterLast( const SVector &v ) : v(v) { } const SVector &v; };
+ struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; };
+ struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; };
+
+ /**
+ * \brief Shared Vector Iterator.
+ * \ingroup iterators
+ */
+ struct Iter
+ {
+ /* Construct, assign. */
+ Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { }
+
+ /* Construct. */
+ Iter( const SVector &v );
+ Iter( const IterFirst &vf );
+ Iter( const IterLast &vl );
+ inline Iter( const IterNext &vn );
+ inline Iter( const IterPrev &vp );
+
+ /* Assign. */
+ Iter &operator=( const SVector &v );
+ Iter &operator=( const IterFirst &vf );
+ Iter &operator=( const IterLast &vl );
+ inline Iter &operator=( const IterNext &vf );
+ inline Iter &operator=( const IterPrev &vl );
+
+ /** \brief Less than end? */
+ bool lte() const { return ptr != ptrEnd; }
+
+ /** \brief At end? */
+ bool end() const { return ptr == ptrEnd; }
+
+ /** \brief Greater than beginning? */
+ bool gtb() const { return ptr != ptrBeg; }
+
+ /** \brief At beginning? */
+ bool beg() const { return ptr == ptrBeg; }
+
+ /** \brief At first element? */
+ bool first() const { return ptr == ptrBeg+1; }
+
+ /** \brief At last element? */
+ bool last() const { return ptr == ptrEnd-1; }
+
+ /* Return the position. */
+ long pos() const { return ptr - ptrBeg - 1; }
+ T &operator[](int i) const { return ptr[i]; }
+
+ /** \brief Implicit cast to T*. */
+ operator T*() const { return ptr; }
+
+ /** \brief Dereference operator returns T&. */
+ T &operator *() const { return *ptr; }
+
+ /** \brief Arrow operator returns T*. */
+ T *operator->() const { return ptr; }
+
+ /** \brief Move to next item. */
+ T *operator++() { return ++ptr; }
+
+ /** \brief Move to next item. */
+ T *operator++(int) { return ptr++; }
+
+ /** \brief Move to next item. */
+ T *increment() { return ++ptr; }
+
+ /** \brief Move to previous item. */
+ T *operator--() { return --ptr; }
+
+ /** \brief Move to previous item. */
+ T *operator--(int) { return ptr--; }
+
+ /** \brief Move to previous item. */
+ T *decrement() { return --ptr; }
+
+ /** \brief Return the next item. Does not modify this. */
+ inline IterNext next() const { return IterNext(*this); }
+
+ /** \brief Return the previous item. Does not modify this. */
+ inline IterPrev prev() const { return IterPrev(*this); }
+
+ /** \brief The iterator is simply a pointer. */
+ T *ptr;
+
+ /* For testing endpoints. */
+ T *ptrBeg, *ptrEnd;
+ };
+
+ /** \brief Return first element. */
+ IterFirst first() { return IterFirst( *this ); }
+
+ /** \brief Return last element. */
+ IterLast last() { return IterLast( *this ); }
+
+protected:
+ void makeRawSpaceFor(long pos, long len);
+
+ void setAsCommon(long len);
+ long replaceCommon(long pos, long len);
+ long insertCommon(long pos, long len);
+
+ void upResize(long len);
+ void upResizeDup(long len);
+ void upResizeFromEmpty(long len);
+ void downResize(long len);
+ void downResizeDup(long len);
+};
+
+#if 0
+/* Create a vector with an intial number of elements and size. */
+template <class T, class Resize> SVector<T, Resize>::
+ SVector( long size, long allocLen )
+{
+ /* Allocate the space if we are given a positive allocLen. */
+ if ( allocLen > 0 ) {
+ /* Allocate the data needed. */
+ STabHead *head = (STabHead*) malloc( sizeof(STabHead) +
+ sizeof(T) * allocLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header and save the data pointer. */
+ head->refCount = 1;
+ head->allocLen = allocLen;
+ head->tabLen = 0;
+ BaseTable::data = (T*) (head + 1);
+ }
+
+ /* Grow to the size specified. If we did not have enough space
+ * allocated that is ok. Table will be grown to the right size. */
+ setAsNew( size );
+}
+#endif
+
+/**
+ * \brief Perform a shallow copy of the vector.
+ *
+ * Takes a reference to the contents of the other vector.
+ */
+template <class T, class Resize> SVector<T, Resize>::
+ SVector(const SVector<T, Resize> &v)
+{
+ /* Take a reference to other, if any data is allocated. */
+ if ( v.data == 0 )
+ BaseTable::data = 0;
+ else {
+ /* Get the source header, up the refcount and ref it. */
+ STabHead *srcHead = ((STabHead*) v.data) - 1;
+ srcHead->refCount += 1;
+ BaseTable::data = (T*) (srcHead + 1);
+ }
+}
+
+#if 0
+/**
+ * \brief Perform a shallow copy of the vector from only the header.
+ *
+ * Takes a reference to the contents specified by the header.
+ */
+template <class T, class Resize> SVector<T, Resize>::
+ SVector(STabHead *head)
+{
+ /* Take a reference to other, if the header is no-null. */
+ if ( head == 0 )
+ BaseTable::data = 0;
+ else {
+ head->refCount += 1;
+ BaseTable::data = (T*) (head + 1);
+ }
+}
+#endif
+
+
+/**
+ * \brief Shallow copy another vector into this vector.
+ *
+ * Takes a reference to the other vector. The contents of this vector are
+ * first emptied.
+ *
+ * \returns A reference to this.
+ */
+template <class T, class Resize> SVector<T, Resize> &
+ SVector<T, Resize>:: operator=( const SVector &v )
+{
+ /* First clean out the current contents. */
+ empty();
+
+ /* Take a reference to other, if any data is allocated. */
+ if ( v.data == 0 )
+ BaseTable::data = 0;
+ else {
+ /* Get the source header, up the refcount and ref it. */
+ STabHead *srcHead = ((STabHead*) v.data) - 1;
+ srcHead->refCount += 1;
+ BaseTable::data = (T*) (srcHead + 1);
+ }
+ return *this;
+}
+
+/**
+ * \brief Shallow copy another vector into this vector from only the header.
+ *
+ * Takes a reference to the other header vector. The contents of this vector
+ * are first emptied.
+ *
+ * \returns A reference to this.
+ */
+template <class T, class Resize> SVector<T, Resize> &
+ SVector<T, Resize>::operator=( STabHead *head )
+{
+ /* First clean out the current contents. */
+ empty();
+
+ /* Take a reference to other, if the header is no-null. */
+ if ( head == 0 )
+ BaseTable::data = 0;
+ else {
+ head->refCount += 1;
+ BaseTable::data = (T*) (head + 1);
+ }
+ return *this;
+}
+
+/* Init a vector iterator with just a vector. */
+template <class T, class Resize> SVector<T, Resize>::
+ Iter::Iter( const SVector &v )
+{
+ long length;
+ if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = v.data;
+ ptrBeg = v.data-1;
+ ptrEnd = v.data+length;
+ }
+}
+
+/* Init a vector iterator with the first of a vector. */
+template <class T, class Resize> SVector<T, Resize>::
+ Iter::Iter( const IterFirst &vf )
+{
+ long length;
+ if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vf.v.data;
+ ptrBeg = vf.v.data-1;
+ ptrEnd = vf.v.data+length;
+ }
+}
+
+/* Init a vector iterator with the last of a vector. */
+template <class T, class Resize> SVector<T, Resize>::
+ Iter::Iter( const IterLast &vl )
+{
+ long length;
+ if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vl.v.data+length-1;
+ ptrBeg = vl.v.data-1;
+ ptrEnd = vl.v.data+length;
+ }
+}
+
+/* Init a vector iterator with the next of some other iterator. */
+template <class T, class Resize> SVector<T, Resize>::
+ Iter::Iter( const IterNext &vn )
+:
+ ptr(vn.i.ptr+1),
+ ptrBeg(vn.i.ptrBeg),
+ ptrEnd(vn.i.ptrEnd)
+{
+}
+
+/* Init a vector iterator with the prev of some other iterator. */
+template <class T, class Resize> SVector<T, Resize>::
+ Iter::Iter( const IterPrev &vp )
+:
+ ptr(vp.i.ptr-1),
+ ptrBeg(vp.i.ptrBeg),
+ ptrEnd(vp.i.ptrEnd)
+{
+}
+
+/* Set a vector iterator with some vector. */
+template <class T, class Resize> typename SVector<T, Resize>::Iter &
+ SVector<T, Resize>::Iter::operator=( const SVector &v )
+{
+ long length;
+ if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = v.data;
+ ptrBeg = v.data-1;
+ ptrEnd = v.data+length;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the first element in a vector. */
+template <class T, class Resize> typename SVector<T, Resize>::Iter &
+ SVector<T, Resize>::Iter::operator=( const IterFirst &vf )
+{
+ long length;
+ if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vf.v.data;
+ ptrBeg = vf.v.data-1;
+ ptrEnd = vf.v.data+length;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the last element in a vector. */
+template <class T, class Resize> typename SVector<T, Resize>::Iter &
+ SVector<T, Resize>::Iter::operator=( const IterLast &vl )
+{
+ long length;
+ if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vl.v.data+length-1;
+ ptrBeg = vl.v.data-1;
+ ptrEnd = vl.v.data+length;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the next of some other iterator. */
+template <class T, class Resize> typename SVector<T, Resize>::Iter &
+ SVector<T, Resize>::Iter::operator=( const IterNext &vn )
+{
+ ptr = vn.i.ptr+1;
+ ptrBeg = vn.i.ptrBeg;
+ ptrEnd = vn.i.ptrEnd;
+ return *this;
+}
+
+/* Set a vector iterator with the prev of some other iterator. */
+template <class T, class Resize> typename SVector<T, Resize>::Iter &
+ SVector<T, Resize>::Iter::operator=( const IterPrev &vp )
+{
+ ptr = vp.i.ptr-1;
+ ptrBeg = vp.i.ptrBeg;
+ ptrEnd = vp.i.ptrEnd;
+ return *this;
+}
+
+/* Up resize the data for len elements using Resize::upResize to tell us the
+ * new length. Reads and writes allocLen. Does not read or write length.
+ * Assumes that there is some data allocated already. */
+template <class T, class Resize> void SVector<T, Resize>::
+ upResize(long len)
+{
+ /* Get the current header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Ask the resizer what the new length will be. */
+ long newLen = Resize::upResize(head->allocLen, len);
+
+ /* Did the data grow? */
+ if ( newLen > head->allocLen ) {
+ head->allocLen = newLen;
+
+ /* Table exists already, resize it up. */
+ head = (STabHead*) realloc( head, sizeof(STabHead) +
+ sizeof(T) * newLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Save the data pointer. */
+ BaseTable::data = (T*) (head + 1);
+ }
+}
+
+/* Allocates a new buffer for an up resize that requires a duplication of the
+ * data. Uses Resize::upResize to get the allocation length. Reads and writes
+ * allocLen. This upResize does write the new length. Assumes that there is
+ * some data allocated already. */
+template <class T, class Resize> void SVector<T, Resize>::
+ upResizeDup(long len)
+{
+ /* Get the current header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Ask the resizer what the new length will be. */
+ long newLen = Resize::upResize(head->allocLen, len);
+
+ /* Dereferencing the existing data, decrement the refcount. */
+ head->refCount -= 1;
+
+ /* Table exists already, resize it up. */
+ head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ head->refCount = 1;
+ head->allocLen = newLen;
+ head->tabLen = len;
+
+ /* Save the data pointer. */
+ BaseTable::data = (T*) (head + 1);
+}
+
+/* Up resize the data for len elements using Resize::upResize to tell us the
+ * new length. Reads and writes allocLen. This upresize DOES write length.
+ * Assumes that no data is allocated. */
+template <class T, class Resize> void SVector<T, Resize>::
+ upResizeFromEmpty(long len)
+{
+ /* There is no table yet. If the len is zero, then there is no need to
+ * create a table. */
+ if ( len > 0 ) {
+ /* Ask the resizer what the new length will be. */
+ long newLen = Resize::upResize(0, len);
+
+ /* If len is greater than zero then we are always allocating the table. */
+ STabHead *head = (STabHead*) malloc( sizeof(STabHead) +
+ sizeof(T) * newLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header and save the data pointer. Note that we set the
+ * length here. This differs from the other upResizes. */
+ head->refCount = 1;
+ head->allocLen = newLen;
+ head->tabLen = len;
+ BaseTable::data = (T*) (head + 1);
+ }
+}
+
+/* Down resize the data for len elements using Resize::downResize to determine
+ * the new length. Reads and writes allocLen. Does not read or write length. */
+template <class T, class Resize> void SVector<T, Resize>::
+ downResize(long len)
+{
+ /* If there is already no length, then there is nothing we can do. */
+ if ( BaseTable::data != 0 ) {
+ /* Get the current header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Ask the resizer what the new length will be. */
+ long newLen = Resize::downResize( head->allocLen, len );
+
+ /* Did the data shrink? */
+ if ( newLen < head->allocLen ) {
+ if ( newLen == 0 ) {
+ /* Simply free the data. */
+ free( head );
+ BaseTable::data = 0;
+ }
+ else {
+ /* Save the new allocated length. */
+ head->allocLen = newLen;
+
+ /* Not shrinking to size zero, realloc it to the smaller size. */
+ head = (STabHead*) realloc( head, sizeof(STabHead) +
+ sizeof(T) * newLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Save the new data ptr. */
+ BaseTable::data = (T*) (head + 1);
+ }
+ }
+ }
+}
+
+/* Allocate a new buffer for a down resize and duplication of the array. The
+ * new array will be len long and allocation size will be determined using
+ * Resize::downResize with the old array's allocLen. Does not actually copy
+ * any data. Reads and writes allocLen and writes the new len. */
+template <class T, class Resize> void SVector<T, Resize>::
+ downResizeDup(long len)
+{
+ /* If there is already no length, then there is nothing we can do. */
+ if ( BaseTable::data != 0 ) {
+ /* Get the current header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Ask the resizer what the new length will be. */
+ long newLen = Resize::downResize( head->allocLen, len );
+
+ /* Detaching from the existing head, decrement the refcount. */
+ head->refCount -= 1;
+
+ /* Not shrinking to size zero, malloc it to the smaller size. */
+ head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Save the new allocated length. */
+ head->refCount = 1;
+ head->allocLen = newLen;
+ head->tabLen = len;
+
+ /* Save the data pointer. */
+ BaseTable::data = (T*) (head + 1);
+ }
+}
+
+/**
+ * \brief Free all memory used by the vector.
+ *
+ * The vector is reset to zero elements. Destructors are called on all
+ * elements in the vector. The space allocated for the vector is freed.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ empty()
+{
+ if ( BaseTable::data != 0 ) {
+ /* Get the header and drop the refcount on the data. */
+ STabHead *head = ((STabHead*) BaseTable::data) - 1;
+ head->refCount -= 1;
+
+ /* If the refcount just went down to zero nobody else is referencing
+ * the data. */
+ if ( head->refCount == 0 ) {
+ /* Call All destructors. */
+ T *pos = BaseTable::data;
+ for ( long i = 0; i < head->tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Free the data space. */
+ free( head );
+ }
+
+ /* Clear the pointer. */
+ BaseTable::data = 0;
+ }
+}
+
+/* Prepare for setting the contents of the vector to some array len long.
+ * Handles reusing the existing space, detaching from a common space or
+ * growing from zero length automatically. */
+template <class T, class Resize> void SVector<T, Resize>::
+ setAsCommon(long len)
+{
+ if ( BaseTable::data != 0 ) {
+ /* Get the header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* If the refCount is one, then we can reuse the space. Otherwise we
+ * must detach from the referenced data create new space. */
+ if ( head->refCount == 1 ) {
+ /* Call All destructors. */
+ T *pos = BaseTable::data;
+ for ( long i = 0; i < head->tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Adjust the allocated length. */
+ if ( len < head->tabLen )
+ downResize( len );
+ else if ( len > head->tabLen )
+ upResize( len );
+
+ if ( BaseTable::data != 0 ) {
+ /* Get the header again and set the length. */
+ head = ((STabHead*)BaseTable::data) - 1;
+ head->tabLen = len;
+ }
+ }
+ else {
+ /* Just detach from the data. */
+ head->refCount -= 1;
+ BaseTable::data = 0;
+
+ /* Make enough space. This will set the length. */
+ upResizeFromEmpty( len );
+ }
+ }
+ else {
+ /* The table is currently empty. Make enough space. This will set the
+ * length. */
+ upResizeFromEmpty( len );
+ }
+}
+
+/**
+ * \brief Set the contents of the vector to be len elements exactly.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. Copy constructors are used to place the
+ * new elements in the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ setAs(const T *val, long len)
+{
+ /* Common stuff for setting the array to len long. */
+ setAsCommon( len );
+
+ /* Copy data in. */
+ T *dst = BaseTable::data;
+ const T *src = val;
+ for ( long i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+}
+
+
+/**
+ * \brief Set the vector to len copies of item.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. The element's copy constructor is used to
+ * copy the item into the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ setAsDup(const T &item, long len)
+{
+ /* Do the common stuff for setting the array to len long. */
+ setAsCommon( len );
+
+ /* Copy item in one spot at a time. */
+ T *dst = BaseTable::data;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(item);
+}
+
+/**
+ * \brief Set the vector to exactly len new items.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. Default constructors are used to init the
+ * new items.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ setAsNew(long len)
+{
+ /* Do the common stuff for setting the array to len long. */
+ setAsCommon( len );
+
+ /* Create items using default constructor. */
+ T *dst = BaseTable::data;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+}
+
+/* Make space in vector for a replacement at pos of len items. Handles reusing
+ * existing space, detaching or growing from zero space. */
+template <class T, class Resize> long SVector<T, Resize>::
+ replaceCommon(long pos, long len)
+{
+ if ( BaseTable::data != 0 ) {
+ /* Get the header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* If we are given a negative position to replace at then treat it as
+ * a position relative to the length. This doesn't have any meaning
+ * unless the length is at least one. */
+ if ( pos < 0 )
+ pos = head->tabLen + pos;
+
+ /* The end is the one past the last item that we want to write to. */
+ long i, endPos = pos + len;
+
+ if ( head->refCount == 1 ) {
+ /* We can reuse the space. Make sure we have enough space. */
+ if ( endPos > head->tabLen ) {
+ upResize( endPos );
+
+ /* Get the header again, whose addr may have changed after
+ * resizing. */
+ head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Delete any objects we need to delete. */
+ T *item = BaseTable::data + pos;
+ for ( i = pos; i < head->tabLen; i++, item++ )
+ item->~T();
+
+ /* We are extending the vector, set the new data length. */
+ head->tabLen = endPos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ T *item = BaseTable::data + pos;
+ for ( i = pos; i < endPos; i++, item++ )
+ item->~T();
+ }
+ }
+ else {
+ /* Use endPos to calc the end of the vector. */
+ long newLen = endPos;
+ if ( newLen < head->tabLen )
+ newLen = head->tabLen;
+
+ /* Duplicate and grow up to endPos. This will set the length. */
+ upResizeDup( newLen );
+
+ /* Copy from src up to pos. */
+ const T *src = (T*) (head + 1);
+ T *dst = BaseTable::data;
+ for ( i = 0; i < pos; i++, dst++, src++)
+ new(dst) T(*src);
+
+ /* Copy any items after the replace range. */
+ for ( i += len, src += len, dst += len;
+ i < head->tabLen; i++, dst++, src++ )
+ new(dst) T(*src);
+ }
+ }
+ else {
+ /* There is no data initially, must grow from zero. This will set the
+ * new length. */
+ upResizeFromEmpty( len );
+ }
+
+ return pos;
+}
+
+
+/**
+ * \brief Replace len elements at position pos.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. Copy constructors are used
+ * to place the elements into the vector. It is allowable for the pos and
+ * length to specify a replacement that overwrites existing elements and
+ * creates new ones. If pos is greater than the length of the vector then
+ * undefined behaviour results. If pos is negative, then it is treated as an
+ * offset relative to the length of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ replace(long pos, const T *val, long len)
+{
+ /* Common work for replacing in the vector. */
+ pos = replaceCommon( pos, len );
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ const T *src = val;
+ for ( long i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+}
+
+/**
+ * \brief Replace at position pos with len copies of an item.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. The copy constructor is
+ * used to place the element into this vector. It is allowable for the pos and
+ * length to specify a replacement that overwrites existing elements and
+ * creates new ones. If pos is greater than the length of the vector then
+ * undefined behaviour results. If pos is negative, then it is treated as an
+ * offset relative to the length of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ replaceDup(long pos, const T &val, long len)
+{
+ /* Common replacement stuff. */
+ pos = replaceCommon( pos, len );
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(val);
+}
+
+/**
+ * \brief Replace at position pos with len new elements.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. The default constructor is
+ * used to initialize the new elements. It is allowable for the pos and length
+ * to specify a replacement that overwrites existing elements and creates new
+ * ones. If pos is greater than the length of the vector then undefined
+ * behaviour results. If pos is negative, then it is treated as an offset
+ * relative to the length of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ replaceNew(long pos, long len)
+{
+ /* Do the common replacement stuff. */
+ pos = replaceCommon( pos, len );
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+}
+
+/**
+ * \brief Remove len elements at position pos.
+ *
+ * Destructor is called on all elements removed. Elements to the right of pos
+ * are shifted len spaces to the left to take up the free space. If pos is
+ * greater than or equal to the length of the vector then undefined behavior
+ * results. If pos is negative then it is treated as an offset relative to the
+ * length of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ remove(long pos, long len)
+{
+ /* If there is no data, we can't delete anything anyways. */
+ if ( BaseTable::data != 0 ) {
+ /* Get the header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* If we are given a negative position to remove at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = head->tabLen + pos;
+
+ /* The first position after the last item deleted. */
+ long endPos = pos + len;
+
+ /* The New data length. */
+ long i, newLen = head->tabLen - len;
+
+ if ( head->refCount == 1 ) {
+ /* We are the only ones using the data. We can reuse
+ * the existing space. */
+
+ /* The place in the data we are deleting at. */
+ T *dst = BaseTable::data + pos;
+
+ /* Call Destructors. */
+ T *item = BaseTable::data + pos;
+ for ( i = 0; i < len; i += 1, item += 1 )
+ item->~T();
+
+ /* Shift data over if necessary. */
+ long lenToSlideOver = head->tabLen - endPos;
+ if ( len > 0 && lenToSlideOver > 0 )
+ memmove(BaseTable::data + pos, dst + len, sizeof(T)*lenToSlideOver);
+
+ /* Shrink the data if necessary. */
+ downResize( newLen );
+
+ if ( BaseTable::data != 0 ) {
+ /* Get the header again (because of the resize) and set the
+ * new data length. */
+ head = ((STabHead*)BaseTable::data) - 1;
+ head->tabLen = newLen;
+ }
+ }
+ else {
+ /* Must detach from the common data. Just copy the non-deleted
+ * items from the common data. */
+
+ /* Duplicate and grow down to newLen. This will set the length. */
+ downResizeDup( newLen );
+
+ /* Copy over just the non-deleted parts. */
+ const T *src = (T*) (head + 1);
+ T *dst = BaseTable::data;
+ for ( i = 0; i < pos; i++, dst++, src++ )
+ new(dst) T(*src);
+
+ /* ... and the second half. */
+ for ( i += len, src += len; i < head->tabLen; i++, src++, dst++ )
+ new(dst) T(*src);
+ }
+ }
+}
+
+/* Shift over existing data. Handles reusing existing space, detaching or
+ * growing from zero space. */
+template <class T, class Resize> long SVector<T, Resize>::
+ insertCommon(long pos, long len)
+{
+ if ( BaseTable::data != 0 ) {
+ /* Get the header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* If we are given a negative position to insert at then treat it as a
+ * position relative to the length. This only has meaning if there is
+ * existing data. */
+ if ( pos < 0 )
+ pos = head->tabLen + pos;
+
+ /* Calculate the new length. */
+ long i, newLen = head->tabLen + len;
+
+ if ( head->refCount == 1 ) {
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Get the header again, (the addr may have changed after
+ * resizing). */
+ head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < head->tabLen ) {
+ memmove( BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(head->tabLen - pos) );
+ }
+
+ /* Grow the length by the len inserted. */
+ head->tabLen += len;
+ }
+ else {
+ /* Need to detach from the existing array. Copy over the other
+ * parts. This will set the length. */
+ upResizeDup( newLen );
+
+ /* Copy over the parts around the insert. */
+ const T *src = (T*) (head + 1);
+ T *dst = BaseTable::data;
+ for ( i = 0; i < pos; i++, dst++, src++ )
+ new(dst) T(*src);
+
+ /* ... and the second half. */
+ for ( dst += len; i < head->tabLen; i++, src++, dst++ )
+ new(dst) T(*src);
+ }
+ }
+ else {
+ /* There is no existing data. Start from zero. This will set the
+ * length. */
+ upResizeFromEmpty( len );
+ }
+
+ return pos;
+}
+
+
+/**
+ * \brief Insert len elements at position pos.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * The copy constructor is used to place the elements into this vector. If pos
+ * is greater than the length of the vector then undefined behaviour results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ insert(long pos, const T *val, long len)
+{
+ /* Do the common insertion stuff. */
+ pos = insertCommon( pos, len );
+
+ /* Copy data in element by element. */
+ T *dst = BaseTable::data + pos;
+ const T *src = val;
+ for ( long i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+}
+
+/**
+ * \brief Insert len copies of item at position pos.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * The copy constructor is used to place the element into this vector. If pos
+ * is greater than the length of the vector then undefined behaviour results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ insertDup(long pos, const T &item, long len)
+{
+ /* Do the common insertion stuff. */
+ pos = insertCommon( pos, len );
+
+ /* Copy the data item in one at a time. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(item);
+}
+
+
+/**
+ * \brief Insert len new elements using the default constructor.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * Default constructors are used to init the new elements. If pos is off the
+ * end of the vector then undefined behaviour results. If pos is negative then
+ * it is treated as an offset relative to the length of the vector.
+ */
+template <class T, class Resize> void SVector<T, Resize>::
+ insertNew(long pos, long len)
+{
+ /* Do the common insertion stuff. */
+ pos = insertCommon( pos, len );
+
+ /* Init new data with default constructors. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+}
+
+/* Makes space for len items, Does not init the items in any way. If pos is
+ * greater than the length of the vector then undefined behaviour results.
+ * Updates the length of the vector. */
+template <class T, class Resize> void SVector<T, Resize>::
+ makeRawSpaceFor(long pos, long len)
+{
+ if ( BaseTable::data != 0 ) {
+ /* Get the header. */
+ STabHead *head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Calculate the new length. */
+ long i, newLen = head->tabLen + len;
+
+ if ( head->refCount == 1 ) {
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Get the header again, (the addr may have changed after
+ * resizing). */
+ head = ((STabHead*)BaseTable::data) - 1;
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < head->tabLen ) {
+ memmove( BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(head->tabLen - pos) );
+ }
+
+ /* Grow the length by the len inserted. */
+ head->tabLen += len;
+ }
+ else {
+ /* Need to detach from the existing array. Copy over the other
+ * parts. This will set the length. */
+ upResizeDup( newLen );
+
+ /* Copy over the parts around the insert. */
+ const T *src = (T*) (head + 1);
+ T *dst = BaseTable::data;
+ for ( i = 0; i < pos; i++, dst++, src++ )
+ new(dst) T(*src);
+
+ /* ... and the second half. */
+ for ( dst += len; i < head->tabLen; i++, src++, dst++ )
+ new(dst) T(*src);
+ }
+ }
+ else {
+ /* There is no existing data. Start from zero. This will set the
+ * length. */
+ upResizeFromEmpty( len );
+ }
+}
+
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+
+#endif /* _AAPL_SVECTOR_H */
diff --git a/aapl/table.h b/aapl/table.h
new file mode 100644
index 0000000..c1f2b7b
--- /dev/null
+++ b/aapl/table.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_TABLE_H
+#define _AAPL_TABLE_H
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class Table
+ * \brief Base class for dynamic arrays.
+ *
+ * Table is used as the common data storage class for vectors. It does not
+ * provide any methods to operate on the data and as such it is not intended
+ * to be used directly. It exists so that algorithms that operatate on dynamic
+ * arrays can be written without knowing about the various vector classes that
+ * my exist.
+ */
+
+/*@}*/
+
+/* Table class. */
+template <class T> class Table
+{
+public:
+ /* Default Constructor. */
+ inline Table();
+
+ /**
+ * \brief Get the length of the vector.
+ *
+ * \returns the length of the vector.
+ */
+ long length() const
+ { return tabLen; }
+
+ /**
+ * \brief Table data.
+ *
+ * The pointer to the elements in the vector. Modifying the vector may
+ * cause this pointer to change.
+ */
+ T *data;
+
+ /**
+ * \brief Table length.
+ *
+ * The number of items of type T in the table.
+ */
+ long tabLen;
+
+ /**
+ * \brief Allocated length.
+ *
+ * The number of items for which there is room in the current allocation.
+ */
+ long allocLen;
+};
+
+/**
+ * \brief Default constructor
+ *
+ * Initialize table data to empty.
+ */
+template <class T> inline Table<T>::Table()
+:
+ data(0),
+ tabLen(0),
+ allocLen(0)
+{
+}
+
+/* Default shared table header class. */
+struct STabHead
+{
+ /**
+ * \brief Table length.
+ *
+ * The number of items of type T in the table.
+ */
+ long tabLen;
+
+ /**
+ * \brief Allocated length.
+ *
+ * The number of items for which there is room in the current allocation.
+ */
+ long allocLen;
+
+ /**
+ * \brief Ref Count.
+ *
+ * The number of shared vectors referencing this data.
+ */
+ long refCount;
+};
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class STable
+ * \brief Base class for implicitly shared dynamic arrays.
+ *
+ * STable is used as the common data storage class for shared vectors. It does
+ * not provide any methods to operate on the data and as such it is not
+ * intended to be used directly. It exists so that algorithms that operatate
+ * on dynamic arrays can be written without knowing about the various shared
+ * vector classes that my exist.
+ */
+
+/*@}*/
+
+/* STable class. */
+template <class T> class STable
+{
+public:
+ /* Default Constructor. */
+ inline STable();
+
+ /**
+ * \brief Get the length of the shared vector.
+ *
+ * \returns the length of the shared vector.
+ */
+ long length() const
+ { return data == 0 ? 0 : (((STabHead*)data) - 1)->tabLen; }
+
+ /**
+ * \brief Get header of the shared vector.
+ *
+ * \returns the header of the shared vector.
+ */
+ STabHead *header() const
+ { return data == 0 ? 0 : (((STabHead*)data) - 1); }
+
+ /**
+ * \brief Table data.
+ *
+ * The pointer to the elements in the vector. The shared table header is
+ * located just behind the data. Modifying the vector may cause this
+ * pointer to change.
+ */
+ T *data;
+};
+
+/**
+ * \brief Default constructor
+ *
+ * Initialize shared table data to empty.
+ */
+template <class T> inline STable<T>::STable()
+:
+ data(0)
+{
+}
+
+/* If needed is greater than existing, give twice needed. */
+#define EXPN_UP( existing, needed ) \
+ needed > existing ? (needed<<1) : existing
+
+/* If needed is less than 1 quarter existing, give twice needed. */
+#define EXPN_DOWN( existing, needed ) \
+ needed < (existing>>2) ? (needed<<1) : existing
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class ResizeExpn
+ * \brief Exponential table resizer.
+ *
+ * ResizeExpn is the default table resizer. When an up resize is needed, space
+ * is doubled. When a down resize is needed, space is halved. The result is
+ * that when growing the vector in a linear fashion, the number of resizes of
+ * the allocated space behaves logarithmically.
+ *
+ * If only up resizes are done, there will never be more than 2 times the
+ * needed space allocated. If down resizes are done as well, there will never
+ * be more than 4 times the needed space allocated. ResizeExpn uses this 50%
+ * usage policy on up resizing and 25% usage policy on down resizing to
+ * improve performance when repeatedly inserting and removing a small number
+ * of elements relative to the size of the array. This scheme guarantees that
+ * repetitive inserting and removing of a small number of elements will never
+ * result in repetative reallocation.
+ *
+ * The sizes passed to the resizer from the vectors are in units of T.
+ */
+
+/*@}*/
+
+/* Exponential resizer. */
+class ResizeExpn
+{
+protected:
+ /**
+ * \brief Determine the new table size when up resizing.
+ *
+ * If the existing size is insufficient for the space needed then allocate
+ * twice the space needed. Otherwise use the existing size.
+ *
+ * \returns The new table size.
+ */
+ static inline long upResize( long existing, long needed )
+ { return EXPN_UP( existing, needed ); }
+
+ /**
+ * \brief Determine the new table size when down resizing.
+ *
+ * If the space needed is less than one quarter of the existing size then
+ * allocate twice the space needed. Otherwise use the exitsing size.
+ *
+ * \returns The new table size.
+ */
+ static inline long downResize( long existing, long needed )
+ { return EXPN_DOWN( existing, needed ); }
+};
+
+#undef EXPN_UP
+#undef EXPN_DOWN
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_TABLE_H */
diff --git a/aapl/vector.h b/aapl/vector.h
new file mode 100644
index 0000000..c33e35b
--- /dev/null
+++ b/aapl/vector.h
@@ -0,0 +1,1202 @@
+/*
+ * Copyright 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _AAPL_VECTOR_H
+#define _AAPL_VECTOR_H
+
+#include <new>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "table.h"
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+/**
+ * \addtogroup vector
+ * @{
+ */
+
+/** \class Vector
+ * \brief Dynamic array.
+ *
+ * This is typical vector implementation. It is a dynamic array that can be
+ * used to contain complex data structures that have constructors and
+ * destructors as well as simple types such as integers and pointers.
+ *
+ * Vector supports inserting, overwriting, and removing single or multiple
+ * elements at once. Constructors and destructors are called wherever
+ * appropriate. For example, before an element is overwritten, it's
+ * destructor is called.
+ *
+ * Vector provides automatic resizing of allocated memory as needed and offers
+ * different allocation schemes for controlling how the automatic allocation
+ * is done. Two senses of the the length of the data is maintained: the
+ * amount of raw memory allocated to the vector and the number of actual
+ * elements in the vector. The various allocation schemes control how the
+ * allocated space is changed in relation to the number of elements in the
+ * vector.
+ *
+ * \include ex_vector.cpp
+ */
+
+/*@}*/
+
+template < class T, class Resize = ResizeExpn > class Vector
+ : public Table<T>, public Resize
+{
+private:
+ typedef Table<T> BaseTable;
+
+public:
+ /**
+ * \brief Initialize an empty vector with no space allocated.
+ *
+ * If a linear resizer is used, the step defaults to 256 units of T. For a
+ * runtime vector both up and down allocation schemes default to
+ * Exponential.
+ */
+ Vector() { }
+
+ /**
+ * \brief Create a vector that contains an initial element.
+ *
+ * The vector becomes one element in length. The element's copy
+ * constructor is used to place the value in the vector.
+ */
+ Vector(const T &val) { setAs(&val, 1); }
+
+ /**
+ * \brief Create a vector that contains an array of elements.
+ *
+ * The vector becomes len elements in length. Copy constructors are used
+ * to place the new elements in the vector.
+ */
+ Vector(const T *val, long len) { setAs(val, len); }
+
+ /* Deep copy. */
+ Vector( const Vector &v );
+
+ /* Free all mem used by the vector. */
+ ~Vector() { empty(); }
+
+ /* Delete all items. */
+ void empty();
+
+ /* Abandon the contents of the vector without deleteing. */
+ void abandon();
+
+ /* Performs a shallow copy of another vector into this vector. If this
+ * vector is non-empty then its contents are lost (not freed). */
+ void shallowCopy( const Vector &v );
+
+ /* Perform a deep copy of another vector into this vector. */
+ Vector &operator=( const Vector &v );
+
+
+ /*@{*/
+ /**
+ * \brief Insert one element at position pos.
+ *
+ * Elements in the vector from pos onward are shifted one space to the
+ * right. The copy constructor is used to place the element into this
+ * vector. If pos is greater than the length of the vector then undefined
+ * behaviour results. If pos is negative then it is treated as an offset
+ * relative to the length of the vector.
+ */
+ void insert(long pos, const T &val) { insert(pos, &val, 1); }
+
+ /* Insert an array of values. */
+ void insert(long pos, const T *val, long len);
+
+ /**
+ * \brief Insert all the elements from another vector at position pos.
+ *
+ * Elements in this vector from pos onward are shifted v.tabLen spaces to
+ * the right. The element's copy constructor is used to copy the items
+ * into this vector. The other vector is left unchanged. If pos is off the
+ * end of the vector, then undefined behaviour results. If pos is negative
+ * then it is treated as an offset relative to the length of the vector.
+ * Equivalent to vector.insert(pos, other.data, other.tabLen).
+ */
+ void insert(long pos, const Vector &v) { insert(pos, v.data, v.tabLen); }
+
+ /* Insert len copies of val into the vector. */
+ void insertDup(long pos, const T &val, long len);
+
+ /**
+ * \brief Insert one new element using the default constrcutor.
+ *
+ * Elements in the vector from pos onward are shifted one space to the
+ * right. The default constructor is used to init the new element. If pos
+ * is greater than the length of the vector then undefined behaviour
+ * results. If pos is negative then it is treated as an offset relative to
+ * the length of the vector.
+ */
+ void insertNew(long pos) { insertNew(pos, 1); }
+
+ /* Insert len new items using default constructor. */
+ void insertNew(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Remove one element at position pos.
+ *
+ * The element's destructor is called. Elements to the right of pos are
+ * shifted one space to the left to take up the free space. If pos is greater
+ * than or equal to the length of the vector then undefined behavior results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+ void remove(long pos) { remove(pos, 1); }
+
+ /* Delete a number of elements. */
+ void remove(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Replace one element at position pos.
+ *
+ * If there is an existing element at position pos (if pos is less than
+ * the length of the vector) then its destructor is called before the
+ * space is used. The copy constructor is used to place the element into
+ * the vector. If pos is greater than the length of the vector then
+ * undefined behaviour results. If pos is negative then it is treated as
+ * an offset relative to the length of the vector.
+ */
+ void replace(long pos, const T &val) { replace(pos, &val, 1); }
+
+ /* Replace with an array of values. */
+ void replace(long pos, const T *val, long len);
+
+ /**
+ * \brief Replace at position pos with all the elements of another vector.
+ *
+ * Replace at position pos with all the elements of another vector. The
+ * other vector is left unchanged. If there are existing elements at the
+ * positions to be replaced, then destructors are called before the space
+ * is used. Copy constructors are used to place the elements into this
+ * vector. It is allowable for the pos and length of the other vector to
+ * specify a replacement that overwrites existing elements and creates new
+ * ones. If pos is greater than the length of the vector then undefined
+ * behaviour results. If pos is negative, then it is treated as an offset
+ * relative to the length of the vector.
+ */
+ void replace(long pos, const Vector &v) { replace(pos, v.data, v.tabLen); }
+
+ /* Replace len items with len copies of val. */
+ void replaceDup(long pos, const T &val, long len);
+
+ /**
+ * \brief Replace at position pos with one new element.
+ *
+ * If there is an existing element at the position to be replaced (pos is
+ * less than the length of the vector) then the element's destructor is
+ * called before the space is used. The default constructor is used to
+ * initialize the new element. If pos is greater than the length of the
+ * vector then undefined behaviour results. If pos is negative, then it is
+ * treated as an offset relative to the length of the vector.
+ */
+ void replaceNew(long pos) { replaceNew(pos, 1); }
+
+ /* Replace len items at pos with newly constructed objects. */
+ void replaceNew(long pos, long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Set the contents of the vector to be val exactly.
+ *
+ * The vector becomes one element in length. Destructors are called on any
+ * existing elements in the vector. The element's copy constructor is used
+ * to place the val in the vector.
+ */
+ void setAs(const T &val) { setAs(&val, 1); }
+
+ /* Set to the contents of an array. */
+ void setAs(const T *val, long len);
+
+ /**
+ * \brief Set the vector to exactly the contents of another vector.
+ *
+ * The vector becomes v.tabLen elements in length. Destructors are called
+ * on any existing elements. Copy constructors are used to place the new
+ * elements in the vector.
+ */
+ void setAs(const Vector &v) { setAs(v.data, v.tabLen); }
+
+ /* Set as len copies of item. */
+ void setAsDup(const T &item, long len);
+
+ /**
+ * \brief Set the vector to exactly one new item.
+ *
+ * The vector becomes one element in length. Destructors are called on any
+ * existing elements in the vector. The default constructor is used to
+ * init the new item.
+ */
+ void setAsNew() { setAsNew(1); }
+
+ /* Set as newly constructed objects using the default constructor. */
+ void setAsNew(long len);
+ /*@}*/
+
+ /*@{*/
+ /**
+ * \brief Append one elment to the end of the vector.
+ *
+ * Copy constructor is used to place the element in the vector.
+ */
+ void append(const T &val) { replace(BaseTable::tabLen, &val, 1); }
+
+ /**
+ * \brief Append len elements to the end of the vector.
+ *
+ * Copy constructors are used to place the elements in the vector.
+ */
+ void append(const T *val, long len) { replace(BaseTable::tabLen, val, len); }
+
+ /**
+ * \brief Append the contents of another vector.
+ *
+ * The other vector is left unchanged. Copy constructors are used to place the
+ * elements in the vector.
+ */
+ void append(const Vector &v) { replace(BaseTable::tabLen, v.data, v.tabLen); }
+
+ /**
+ * \brief Append len copies of item.
+ *
+ * The copy constructor is used to place the item in the vector.
+ */
+ void appendDup(const T &item, long len) { replaceDup(BaseTable::tabLen, item, len); }
+
+ /**
+ * \brief Append a single newly created item.
+ *
+ * The new element is initialized with the default constructor.
+ */
+ void appendNew() { replaceNew(BaseTable::tabLen, 1); }
+
+ /**
+ * \brief Append len newly created items.
+ *
+ * The new elements are initialized with the default constructor.
+ */
+ void appendNew(long len) { replaceNew(BaseTable::tabLen, len); }
+ /*@}*/
+
+ /*@{*/
+ /** \fn Vector::prepend(const T &val)
+ * \brief Prepend one elment to the front of the vector.
+ *
+ * Copy constructor is used to place the element in the vector.
+ */
+ void prepend(const T &val) { insert(0, &val, 1); }
+
+ /**
+ * \brief Prepend len elements to the front of the vector.
+ *
+ * Copy constructors are used to place the elements in the vector.
+ */
+ void prepend(const T *val, long len) { insert(0, val, len); }
+
+ /**
+ * \brief Prepend the contents of another vector.
+ *
+ * The other vector is left unchanged. Copy constructors are used to place the
+ * elements in the vector.
+ */
+ void prepend(const Vector &v) { insert(0, v.data, v.tabLen); }
+
+ /**
+ * \brief Prepend len copies of item.
+ *
+ * The copy constructor is used to place the item in the vector.
+ */
+ void prependDup(const T &item, long len) { insertDup(0, item, len); }
+
+ /**
+ * \brief Prepend a single newly created item.
+ *
+ * The new element is initialized with the default constructor.
+ */
+ void prependNew() { insertNew(0, 1); }
+
+ /**
+ * \brief Prepend len newly created items.
+ *
+ * The new elements are initialized with the default constructor.
+ */
+ void prependNew(long len) { insertNew(0, len); }
+ /*@}*/
+
+ /* Convenience access. */
+ T &operator[](int i) const { return BaseTable::data[i]; }
+ long size() const { return BaseTable::tabLen; }
+
+ /* Forward this so a ref can be used. */
+ struct Iter;
+
+ /* Various classes for setting the iterator */
+ struct IterFirst { IterFirst( const Vector &v ) : v(v) { } const Vector &v; };
+ struct IterLast { IterLast( const Vector &v ) : v(v) { } const Vector &v; };
+ struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; };
+ struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; };
+
+ /**
+ * \brief Vector Iterator.
+ * \ingroup iterators
+ */
+ struct Iter
+ {
+ /* Construct, assign. */
+ Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { }
+
+ /* Construct. */
+ Iter( const Vector &v );
+ Iter( const IterFirst &vf );
+ Iter( const IterLast &vl );
+ inline Iter( const IterNext &vn );
+ inline Iter( const IterPrev &vp );
+
+ /* Assign. */
+ Iter &operator=( const Vector &v );
+ Iter &operator=( const IterFirst &vf );
+ Iter &operator=( const IterLast &vl );
+ inline Iter &operator=( const IterNext &vf );
+ inline Iter &operator=( const IterPrev &vl );
+
+ /** \brief Less than end? */
+ bool lte() const { return ptr != ptrEnd; }
+
+ /** \brief At end? */
+ bool end() const { return ptr == ptrEnd; }
+
+ /** \brief Greater than beginning? */
+ bool gtb() const { return ptr != ptrBeg; }
+
+ /** \brief At beginning? */
+ bool beg() const { return ptr == ptrBeg; }
+
+ /** \brief At first element? */
+ bool first() const { return ptr == ptrBeg+1; }
+
+ /** \brief At last element? */
+ bool last() const { return ptr == ptrEnd-1; }
+
+ /* Return the position. */
+ long pos() const { return ptr - ptrBeg - 1; }
+ T &operator[](int i) const { return ptr[i]; }
+
+ /** \brief Implicit cast to T*. */
+ operator T*() const { return ptr; }
+
+ /** \brief Dereference operator returns T&. */
+ T &operator *() const { return *ptr; }
+
+ /** \brief Arrow operator returns T*. */
+ T *operator->() const { return ptr; }
+
+ /** \brief Move to next item. */
+ T *operator++() { return ++ptr; }
+
+ /** \brief Move to next item. */
+ T *operator++(int) { return ptr++; }
+
+ /** \brief Move to next item. */
+ T *increment() { return ++ptr; }
+
+ /** \brief Move n items forward. */
+ T *operator+=(long n) { return ptr+=n; }
+
+ /** \brief Move to previous item. */
+ T *operator--() { return --ptr; }
+
+ /** \brief Move to previous item. */
+ T *operator--(int) { return ptr--; }
+
+ /** \brief Move to previous item. */
+ T *decrement() { return --ptr; }
+
+ /** \brief Move n items back. */
+ T *operator-=(long n) { return ptr-=n; }
+
+ /** \brief Return the next item. Does not modify this. */
+ inline IterNext next() const { return IterNext(*this); }
+
+ /** \brief Return the previous item. Does not modify this. */
+ inline IterPrev prev() const { return IterPrev(*this); }
+
+ /** \brief The iterator is simply a pointer. */
+ T *ptr;
+
+ /* For testing endpoints. */
+ T *ptrBeg, *ptrEnd;
+ };
+
+ /** \brief Return first element. */
+ IterFirst first() { return IterFirst( *this ); }
+
+ /** \brief Return last element. */
+ IterLast last() { return IterLast( *this ); }
+
+protected:
+ void makeRawSpaceFor(long pos, long len);
+
+ void upResize(long len);
+ void downResize(long len);
+};
+
+#if 0
+/* Create a vector with an intial number of elements and size. */
+template<class T, class Resize> Vector<T, Resize>::
+ Vector( long size, long allocLen )
+{
+ /* Allocate the space if we are given a positive allocLen. */
+ BaseTable::allocLen = allocLen;
+ if ( allocLen > 0 ) {
+ BaseTable::data = (T*) malloc(sizeof(T) * BaseTable::allocLen);
+ if ( BaseTable::data == 0 )
+ throw std::bad_alloc();
+ }
+
+ /* Grow to the size specified. If we did not have enough space
+ * allocated that is ok. Table will be grown to right size. */
+ setAsNew( size );
+}
+#endif
+
+/* Init a vector iterator with just a vector. */
+template <class T, class Resize> Vector<T, Resize>::Iter::Iter( const Vector &v )
+{
+ if ( v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = v.data;
+ ptrBeg = v.data-1;
+ ptrEnd = v.data+v.tabLen;
+ }
+}
+
+/* Init a vector iterator with the first of a vector. */
+template <class T, class Resize> Vector<T, Resize>::Iter::Iter(
+ const IterFirst &vf )
+{
+ if ( vf.v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vf.v.data;
+ ptrBeg = vf.v.data-1;
+ ptrEnd = vf.v.data+vf.v.tabLen;
+ }
+}
+
+/* Init a vector iterator with the last of a vector. */
+template <class T, class Resize> Vector<T, Resize>::Iter::Iter(
+ const IterLast &vl )
+{
+ if ( vl.v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vl.v.data+vl.v.tabLen-1;
+ ptrBeg = vl.v.data-1;
+ ptrEnd = vl.v.data+vl.v.tabLen;
+ }
+}
+
+/* Init a vector iterator with the next of some other iterator. */
+template <class T, class Resize> Vector<T, Resize>::Iter::Iter(
+ const IterNext &vn )
+:
+ ptr(vn.i.ptr+1),
+ ptrBeg(vn.i.ptrBeg),
+ ptrEnd(vn.i.ptrEnd)
+{
+}
+
+/* Init a vector iterator with the prev of some other iterator. */
+template <class T, class Resize> Vector<T, Resize>::Iter::Iter(
+ const IterPrev &vp )
+:
+ ptr(vp.i.ptr-1),
+ ptrBeg(vp.i.ptrBeg),
+ ptrEnd(vp.i.ptrEnd)
+{
+}
+
+/* Set a vector iterator with some vector. */
+template <class T, class Resize> typename Vector<T, Resize>::Iter &
+ Vector<T, Resize>::Iter::operator=( const Vector &v )
+{
+ if ( v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = v.data;
+ ptrBeg = v.data-1;
+ ptrEnd = v.data+v.tabLen;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the first element in a vector. */
+template <class T, class Resize> typename Vector<T, Resize>::Iter &
+ Vector<T, Resize>::Iter::operator=( const IterFirst &vf )
+{
+ if ( vf.v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vf.v.data;
+ ptrBeg = vf.v.data-1;
+ ptrEnd = vf.v.data+vf.v.tabLen;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the last element in a vector. */
+template <class T, class Resize> typename Vector<T, Resize>::Iter &
+ Vector<T, Resize>::Iter::operator=( const IterLast &vl )
+{
+ if ( vl.v.tabLen == 0 )
+ ptr = ptrBeg = ptrEnd = 0;
+ else {
+ ptr = vl.v.data+vl.v.tabLen-1;
+ ptrBeg = vl.v.data-1;
+ ptrEnd = vl.v.data+vl.v.tabLen;
+ }
+ return *this;
+}
+
+/* Set a vector iterator with the next of some other iterator. */
+template <class T, class Resize> typename Vector<T, Resize>::Iter &
+ Vector<T, Resize>::Iter::operator=( const IterNext &vn )
+{
+ ptr = vn.i.ptr+1;
+ ptrBeg = vn.i.ptrBeg;
+ ptrEnd = vn.i.ptrEnd;
+ return *this;
+}
+
+/* Set a vector iterator with the prev of some other iterator. */
+template <class T, class Resize> typename Vector<T, Resize>::Iter &
+ Vector<T, Resize>::Iter::operator=( const IterPrev &vp )
+{
+ ptr = vp.i.ptr-1;
+ ptrBeg = vp.i.ptrBeg;
+ ptrEnd = vp.i.ptrEnd;
+ return *this;
+}
+
+/**
+ * \brief Forget all elements in the vector.
+ *
+ * The contents of the vector are reset to null without without the space
+ * being freed.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ abandon()
+{
+ BaseTable::data = 0;
+ BaseTable::tabLen = 0;
+ BaseTable::allocLen = 0;
+}
+
+/**
+ * \brief Shallow copy another vector into this vector.
+ *
+ * The dynamic array of the other vector is copied into this vector by
+ * reference. If this vector is non-empty then its contents are lost. This
+ * routine must be used with care. After a shallow copy one vector should
+ * abandon its contents to prevent both destructors from attempting to free
+ * the common array.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ shallowCopy( const Vector &v )
+{
+ BaseTable::data = v.data;
+ BaseTable::tabLen = v.tabLen;
+ BaseTable::allocLen = v.allocLen;
+}
+
+/**
+ * \brief Deep copy another vector into this vector.
+ *
+ * Copies the entire contents of the other vector into this vector. Any
+ * existing contents are first deleted. Equivalent to setAs.
+ *
+ * \returns A reference to this.
+ */
+template<class T, class Resize> Vector<T, Resize> &Vector<T, Resize>::
+ operator=( const Vector &v )
+{
+ setAs(v.data, v.tabLen);
+ return *this;
+}
+
+/* Up resize the data for len elements using Resize::upResize to tell us the
+ * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+template<class T, class Resize> void Vector<T, Resize>::
+ upResize(long len)
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long newLen = Resize::upResize(BaseTable::allocLen, len);
+
+ /* Did the data grow? */
+ if ( newLen > BaseTable::allocLen ) {
+ BaseTable::allocLen = newLen;
+ if ( BaseTable::data != 0 ) {
+ /* Table exists already, resize it up. */
+ BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen );
+ if ( BaseTable::data == 0 )
+ throw std::bad_alloc();
+ }
+ else {
+ /* Create the data. */
+ BaseTable::data = (T*) malloc( sizeof(T) * newLen );
+ if ( BaseTable::data == 0 )
+ throw std::bad_alloc();
+ }
+ }
+}
+
+/* Down resize the data for len elements using Resize::downResize to determine
+ * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+template<class T, class Resize> void Vector<T, Resize>::
+ downResize(long len)
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long newLen = Resize::downResize( BaseTable::allocLen, len );
+
+ /* Did the data shrink? */
+ if ( newLen < BaseTable::allocLen ) {
+ BaseTable::allocLen = newLen;
+ if ( newLen == 0 ) {
+ /* Simply free the data. */
+ free( BaseTable::data );
+ BaseTable::data = 0;
+ }
+ else {
+ /* Not shrinking to size zero, realloc it to the smaller size. */
+ BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen );
+ if ( BaseTable::data == 0 )
+ throw std::bad_alloc();
+ }
+ }
+}
+
+/**
+ * \brief Perform a deep copy of the vector.
+ *
+ * The contents of the other vector are copied into this vector. This vector
+ * gets the same allocation size as the other vector. All items are copied
+ * using the element's copy constructor.
+ */
+template<class T, class Resize> Vector<T, Resize>::
+ Vector(const Vector<T, Resize> &v)
+{
+ BaseTable::tabLen = v.tabLen;
+ BaseTable::allocLen = v.allocLen;
+
+ if ( BaseTable::allocLen > 0 ) {
+ /* Allocate needed space. */
+ BaseTable::data = (T*) malloc(sizeof(T) * BaseTable::allocLen);
+ if ( BaseTable::data == 0 )
+ throw std::bad_alloc();
+
+ /* If there are any items in the src data, copy them in. */
+ T *dst = BaseTable::data, *src = v.data;
+ for (long pos = 0; pos < BaseTable::tabLen; pos++, dst++, src++ )
+ new(dst) T(*src);
+ }
+ else {
+ /* Nothing allocated. */
+ BaseTable::data = 0;
+ }
+}
+
+/** \fn Vector::~Vector()
+ * \brief Free all memory used by the vector.
+ *
+ * The vector is reset to zero elements. Destructors are called on all
+ * elements in the vector. The space allocated for the vector is freed.
+ */
+
+
+/**
+ * \brief Free all memory used by the vector.
+ *
+ * The vector is reset to zero elements. Destructors are called on all
+ * elements in the vector. The space allocated for the vector is freed.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ empty()
+{
+ if ( BaseTable::data != 0 ) {
+ /* Call All destructors. */
+ T *pos = BaseTable::data;
+ for ( long i = 0; i < BaseTable::tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Free the data space. */
+ free( BaseTable::data );
+ BaseTable::data = 0;
+ BaseTable::tabLen = BaseTable::allocLen = 0;
+ }
+}
+
+/**
+ * \brief Set the contents of the vector to be len elements exactly.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. Copy constructors are used to place the
+ * new elements in the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ setAs(const T *val, long len)
+{
+ /* Call All destructors. */
+ long i;
+ T *pos = BaseTable::data;
+ for ( i = 0; i < BaseTable::tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Adjust the allocated length. */
+ if ( len < BaseTable::tabLen )
+ downResize( len );
+ else if ( len > BaseTable::tabLen )
+ upResize( len );
+
+ /* Set the new data length to exactly len. */
+ BaseTable::tabLen = len;
+
+ /* Copy data in. */
+ T *dst = BaseTable::data;
+ const T *src = val;
+ for ( i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+}
+
+/**
+ * \brief Set the vector to len copies of item.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. The element's copy constructor is used to
+ * copy the item into the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ setAsDup(const T &item, long len)
+{
+ /* Call All destructors. */
+ T *pos = BaseTable::data;
+ for ( long i = 0; i < BaseTable::tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Adjust the allocated length. */
+ if ( len < BaseTable::tabLen )
+ downResize( len );
+ else if ( len > BaseTable::tabLen )
+ upResize( len );
+
+ /* Set the new data length to exactly len. */
+ BaseTable::tabLen = len;
+
+ /* Copy item in one spot at a time. */
+ T *dst = BaseTable::data;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(item);
+}
+
+/**
+ * \brief Set the vector to exactly len new items.
+ *
+ * The vector becomes len elements in length. Destructors are called on any
+ * existing elements in the vector. Default constructors are used to init the
+ * new items.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ setAsNew(long len)
+{
+ /* Call All destructors. */
+ T *pos = BaseTable::data;
+ for ( long i = 0; i < BaseTable::tabLen; pos++, i++ )
+ pos->~T();
+
+ /* Adjust the allocated length. */
+ if ( len < BaseTable::tabLen )
+ downResize( len );
+ else if ( len > BaseTable::tabLen )
+ upResize( len );
+
+ /* Set the new data length to exactly len. */
+ BaseTable::tabLen = len;
+
+ /* Create items using default constructor. */
+ T *dst = BaseTable::data;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+}
+
+
+/**
+ * \brief Replace len elements at position pos.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. Copy constructors are used
+ * to place the elements into the vector. It is allowable for the pos and
+ * length to specify a replacement that overwrites existing elements and
+ * creates new ones. If pos is greater than the length of the vector then
+ * undefined behaviour results. If pos is negative, then it is treated as an
+ * offset relative to the length of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ replace(long pos, const T *val, long len)
+{
+ long endPos, i;
+ T *item;
+
+ /* If we are given a negative position to replace at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* The end is the one past the last item that we want
+ * to write to. */
+ endPos = pos + len;
+
+ /* Make sure we have enough space. */
+ if ( endPos > BaseTable::tabLen ) {
+ upResize( endPos );
+
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < BaseTable::tabLen; i++, item++ )
+ item->~T();
+
+ /* We are extending the vector, set the new data length. */
+ BaseTable::tabLen = endPos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < endPos; i++, item++ )
+ item->~T();
+ }
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ const T *src = val;
+ for ( i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+}
+
+/**
+ * \brief Replace at position pos with len copies of an item.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. The copy constructor is
+ * used to place the element into this vector. It is allowable for the pos and
+ * length to specify a replacement that overwrites existing elements and
+ * creates new ones. If pos is greater than the length of the vector then
+ * undefined behaviour results. If pos is negative, then it is treated as an
+ * offset relative to the length of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ replaceDup(long pos, const T &val, long len)
+{
+ long endPos, i;
+ T *item;
+
+ /* If we are given a negative position to replace at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* The end is the one past the last item that we want
+ * to write to. */
+ endPos = pos + len;
+
+ /* Make sure we have enough space. */
+ if ( endPos > BaseTable::tabLen ) {
+ upResize( endPos );
+
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < BaseTable::tabLen; i++, item++ )
+ item->~T();
+
+ /* We are extending the vector, set the new data length. */
+ BaseTable::tabLen = endPos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < endPos; i++, item++ )
+ item->~T();
+ }
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(val);
+}
+
+/**
+ * \brief Replace at position pos with len new elements.
+ *
+ * If there are existing elements at the positions to be replaced, then
+ * destructors are called before the space is used. The default constructor is
+ * used to initialize the new elements. It is allowable for the pos and length
+ * to specify a replacement that overwrites existing elements and creates new
+ * ones. If pos is greater than the length of the vector then undefined
+ * behaviour results. If pos is negative, then it is treated as an offset
+ * relative to the length of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ replaceNew(long pos, long len)
+{
+ long endPos, i;
+ T *item;
+
+ /* If we are given a negative position to replace at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* The end is the one past the last item that we want
+ * to write to. */
+ endPos = pos + len;
+
+ /* Make sure we have enough space. */
+ if ( endPos > BaseTable::tabLen ) {
+ upResize( endPos );
+
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < BaseTable::tabLen; i++, item++ )
+ item->~T();
+
+ /* We are extending the vector, set the new data length. */
+ BaseTable::tabLen = endPos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ item = BaseTable::data + pos;
+ for ( i = pos; i < endPos; i++, item++ )
+ item->~T();
+ }
+
+ /* Copy data in using copy constructor. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+}
+
+/**
+ * \brief Remove len elements at position pos.
+ *
+ * Destructor is called on all elements removed. Elements to the right of pos
+ * are shifted len spaces to the left to take up the free space. If pos is
+ * greater than or equal to the length of the vector then undefined behavior
+ * results. If pos is negative then it is treated as an offset relative to the
+ * length of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ remove(long pos, long len)
+{
+ long newLen, lenToSlideOver, endPos;
+ T *dst, *item;
+
+ /* If we are given a negative position to remove at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* The first position after the last item deleted. */
+ endPos = pos + len;
+
+ /* The new data length. */
+ newLen = BaseTable::tabLen - len;
+
+ /* The place in the data we are deleting at. */
+ dst = BaseTable::data + pos;
+
+ /* Call Destructors. */
+ item = dst;
+ for ( long i = 0; i < len; i += 1, item += 1 )
+ item->~T();
+
+ /* Shift data over if necessary. */
+ lenToSlideOver = BaseTable::tabLen - endPos;
+ if ( len > 0 && lenToSlideOver > 0 )
+ memmove(dst, dst + len, sizeof(T)*lenToSlideOver);
+
+ /* Shrink the data if necessary. */
+ downResize( newLen );
+
+ /* Set the new data length. */
+ BaseTable::tabLen = newLen;
+}
+
+/**
+ * \brief Insert len elements at position pos.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * The copy constructor is used to place the elements into this vector. If pos
+ * is greater than the length of the vector then undefined behaviour results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ insert(long pos, const T *val, long len)
+{
+ /* If we are given a negative position to insert at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* Calculate the new length. */
+ long newLen = BaseTable::tabLen + len;
+
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < BaseTable::tabLen ) {
+ memmove(BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(BaseTable::tabLen-pos));
+ }
+
+ /* Copy data in element by element. */
+ T *dst = BaseTable::data + pos;
+ const T *src = val;
+ for ( long i = 0; i < len; i++, dst++, src++ )
+ new(dst) T(*src);
+
+ /* Set the new length. */
+ BaseTable::tabLen = newLen;
+}
+
+/**
+ * \brief Insert len copies of item at position pos.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * The copy constructor is used to place the element into this vector. If pos
+ * is greater than the length of the vector then undefined behaviour results.
+ * If pos is negative then it is treated as an offset relative to the length
+ * of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ insertDup(long pos, const T &item, long len)
+{
+ /* If we are given a negative position to insert at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* Calculate the new length. */
+ long newLen = BaseTable::tabLen + len;
+
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < BaseTable::tabLen ) {
+ memmove(BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(BaseTable::tabLen-pos));
+ }
+
+ /* Copy the data item in one at a time. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T(item);
+
+ /* Set the new length. */
+ BaseTable::tabLen = newLen;
+}
+
+/**
+ * \brief Insert len new elements using the default constructor.
+ *
+ * Elements in the vector from pos onward are shifted len spaces to the right.
+ * Default constructors are used to init the new elements. If pos is off the
+ * end of the vector then undefined behaviour results. If pos is negative then
+ * it is treated as an offset relative to the length of the vector.
+ */
+template<class T, class Resize> void Vector<T, Resize>::
+ insertNew(long pos, long len)
+{
+ /* If we are given a negative position to insert at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = BaseTable::tabLen + pos;
+
+ /* Calculate the new length. */
+ long newLen = BaseTable::tabLen + len;
+
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < BaseTable::tabLen ) {
+ memmove(BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(BaseTable::tabLen-pos));
+ }
+
+ /* Init new data with default constructors. */
+ T *dst = BaseTable::data + pos;
+ for ( long i = 0; i < len; i++, dst++ )
+ new(dst) T();
+
+ /* Set the new length. */
+ BaseTable::tabLen = newLen;
+}
+
+/* Makes space for len items, Does not init the items in any way. If pos is
+ * greater than the length of the vector then undefined behaviour results.
+ * Updates the length of the vector. */
+template<class T, class Resize> void Vector<T, Resize>::
+ makeRawSpaceFor(long pos, long len)
+{
+ /* Calculate the new length. */
+ long newLen = BaseTable::tabLen + len;
+
+ /* Up resize, we are growing. */
+ upResize( newLen );
+
+ /* Shift over data at insert spot if needed. */
+ if ( len > 0 && pos < BaseTable::tabLen ) {
+ memmove(BaseTable::data + pos + len, BaseTable::data + pos,
+ sizeof(T)*(BaseTable::tabLen-pos));
+ }
+
+ /* Save the new length. */
+ BaseTable::tabLen = newLen;
+}
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_VECTOR_H */
diff --git a/common/Makefile.in b/common/Makefile.in
new file mode 100644
index 0000000..a08ce60
--- /dev/null
+++ b/common/Makefile.in
@@ -0,0 +1,71 @@
+#
+# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+INCS = -Iaapl
+DEFS =
+
+CFLAGS = -g -Wall
+LDFLAGS =
+
+SUBDIRS = codegen test examples doc
+
+CC_SRCS = \
+ rlparse.cpp rlscan.cpp main.cpp parsetree.cpp \
+ parsedata.cpp fsmstate.cpp fsmbase.cpp fsmattach.cpp \
+ fsmmin.cpp fsmgraph.cpp fsmap.cpp xmlcodegen.cpp
+
+GEN_SRC = version.h
+
+LIBS = @LIBS@
+PREFIX = @prefix@
+
+#*************************************
+
+include ../version.mk
+
+# Programs
+CXX = @CXX@
+
+# What kind of header does bison put out?
+BISON_HEAD_SUFFIX = @BISON_HEAD_SUFFIX@
+
+# Get objects and dependencies from sources.
+RAGEL_OBJS = $(RAGEL_CC_SRCS:%.cpp=%.o)
+RLCG_OBJS = $(RLCG_CC_SRCS:%.cpp=%.o)
+DEPS = $(RAGEL_CC_SRCS:%.cpp=.%.d) $(RLCG_CC_SRCS:%.cpp=.%.d)
+
+# Rules.
+all: $(GEN_SRC)
+
+version.h: ../version.mk
+ echo '#define VERSION "$(VERSION)"' > version.h
+ echo '#define PUBDATE "$(PUBDATE)"' >> version.h
+
+%.o: %.cpp
+ @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d
+ $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $<
+
+distclean: clean
+ rm -f Makefile config.h
+
+clean:
+ rm -f tags .*.d *.o version.h
+
+-include $(DEPS)
diff --git a/common/buffer.h b/common/buffer.h
new file mode 100644
index 0000000..99c4e82
--- /dev/null
+++ b/common/buffer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2003 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _BUFFER_H
+#define _BUFFER_H
+
+#define BUFFER_INITIAL_SIZE 4096
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+struct Buffer
+{
+ Buffer()
+ {
+ data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ allocated = BUFFER_INITIAL_SIZE;
+ length = 0;
+ }
+ ~Buffer() { free(data); }
+
+ void append( char p )
+ {
+ if ( length == allocated ) {
+ allocated *= 2;
+ data = (char*) realloc( data, allocated );
+ }
+ data[length++] = p;
+ }
+
+ void clear() { length = 0; }
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+#endif /* _BUFFER_H */
diff --git a/common/common.cpp b/common/common.cpp
new file mode 100644
index 0000000..db23235
--- /dev/null
+++ b/common/common.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "common.h"
+
+HostType hostTypesC[] =
+{
+ { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) },
+ { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, sizeof(short) },
+ { "unsigned", "short", false, 0, USHRT_MAX, sizeof(unsigned short) },
+ { "int", 0, true, INT_MIN, INT_MAX, sizeof(int) },
+ { "unsigned", "int", false, 0, UINT_MAX, sizeof(unsigned int) },
+ { "long", 0, true, LONG_MIN, LONG_MAX, sizeof(long) },
+ { "unsigned", "long", false, 0, ULONG_MAX, sizeof(unsigned long) }
+};
+
+HostType hostTypesD[] =
+{
+ { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 },
+ { "ubyte", 0, false, 0, UCHAR_MAX, 1 },
+ { "char", 0, false, 0, UCHAR_MAX, 1 },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 },
+ { "ushort", 0, false, 0, USHRT_MAX, 2 },
+ { "wchar", 0, false, 0, USHRT_MAX, 2 },
+ { "int", 0, true, INT_MIN, INT_MAX, 4 },
+ { "uint", 0, false, 0, UINT_MAX, 4 },
+ { "dchar", 0, false, 0, UINT_MAX, 4 }
+};
+
+HostType hostTypesJava[] =
+{
+ { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 },
+ { "char", 0, false, 0, USHRT_MAX, 2 },
+ { "int", 0, true, INT_MIN, INT_MAX, 4 },
+};
+
+HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true };
+HostLang hostLangD = { hostTypesD, 9, hostTypesD+2, true };
+HostLang hostLangJava = { hostTypesJava, 4, hostTypesJava+2, false };
+
+HostLang *hostLang = &hostLangC;
+HostLangType hostLangType = CCode;
+
+/* Construct a new parameter checker with for paramSpec. */
+ParamCheck::ParamCheck(char *paramSpec, int argc, char **argv)
+:
+ state(noparam),
+ argOffset(0),
+ curArg(0),
+ iCurArg(1),
+ paramSpec(paramSpec),
+ argc(argc),
+ argv(argv)
+{
+}
+
+/* Check a single option. Returns the index of the next parameter. Sets p to
+ * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if
+ * there is one, NULL otherwise. */
+bool ParamCheck::check()
+{
+ bool requiresParam;
+
+ if ( iCurArg >= argc ) { /* Off the end of the arg list. */
+ state = noparam;
+ return false;
+ }
+
+ if ( argOffset != 0 && *argOffset == 0 ) {
+ /* We are at the end of an arg string. */
+ iCurArg += 1;
+ if ( iCurArg >= argc ) {
+ state = noparam;
+ return false;
+ }
+ argOffset = 0;
+ }
+
+ if ( argOffset == 0 ) {
+ /* Set the current arg. */
+ curArg = argv[iCurArg];
+
+ /* We are at the beginning of an arg string. */
+ if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */
+ argv[iCurArg][0] != '-' || /* Not a param. */
+ argv[iCurArg][1] == 0 ) { /* Only a dash. */
+ parameter = 0;
+ parameterArg = 0;
+
+ iCurArg += 1;
+ state = noparam;
+ return true;
+ }
+ argOffset = argv[iCurArg] + 1;
+ }
+
+ /* Get the arg char. */
+ char argChar = *argOffset;
+
+ /* Loop over all the parms and look for a match. */
+ char *pSpec = paramSpec;
+ while ( *pSpec != 0 ) {
+ char pSpecChar = *pSpec;
+
+ /* If there is a ':' following the char then
+ * it requires a parm. If a parm is required
+ * then move ahead two in the parmspec. Otherwise
+ * move ahead one in the parm spec. */
+ if ( pSpec[1] == ':' ) {
+ requiresParam = true;
+ pSpec += 2;
+ }
+ else {
+ requiresParam = false;
+ pSpec += 1;
+ }
+
+ /* Do we have a match. */
+ if ( argChar == pSpecChar ) {
+ if ( requiresParam ) {
+ if ( argOffset[1] == 0 ) {
+ /* The param must follow. */
+ if ( iCurArg + 1 == argc ) {
+ /* We are the last arg so there
+ * cannot be a parameter to it. */
+ parameter = argChar;
+ parameterArg = 0;
+ iCurArg += 1;
+ argOffset = 0;
+ state = invalid;
+ return true;
+ }
+ else {
+ /* the parameter to the arg is the next arg. */
+ parameter = pSpecChar;
+ parameterArg = argv[iCurArg + 1];
+ iCurArg += 2;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* The param for the arg is built in. */
+ parameter = pSpecChar;
+ parameterArg = argOffset + 1;
+ iCurArg += 1;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* Good, we matched the parm and no
+ * arg is required. */
+ parameter = pSpecChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = match;
+ return true;
+ }
+ }
+ }
+
+ /* We did not find a match. Bad Argument. */
+ parameter = argChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = invalid;
+ return true;
+}
+
+
diff --git a/common/common.h b/common/common.h
new file mode 100644
index 0000000..077a3f6
--- /dev/null
+++ b/common/common.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _COMMON_H
+#define _COMMON_H
+
+#include <climits>
+
+typedef unsigned long long Size;
+
+struct Key
+{
+private:
+ long key;
+
+public:
+ friend inline Key operator+(const Key key1, const Key key2);
+ friend inline Key operator-(const Key key1, const Key key2);
+ friend inline Key operator/(const Key key1, const Key key2);
+ friend inline long operator&(const Key key1, const Key key2);
+
+ friend inline bool operator<( const Key key1, const Key key2 );
+ friend inline bool operator<=( const Key key1, const Key key2 );
+ friend inline bool operator>( const Key key1, const Key key2 );
+ friend inline bool operator>=( const Key key1, const Key key2 );
+ friend inline bool operator==( const Key key1, const Key key2 );
+ friend inline bool operator!=( const Key key1, const Key key2 );
+
+ friend struct KeyOps;
+
+ Key( ) {}
+ Key( const Key &key ) : key(key.key) {}
+ Key( long key ) : key(key) {}
+
+ /* Returns the value used to represent the key. This value must be
+ * interpreted based on signedness. */
+ long getVal() const { return key; };
+
+ /* Returns the key casted to a long long. This form of the key does not
+ * require and signedness interpretation. */
+ long long getLongLong() const;
+
+ bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
+ bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
+ bool isPrintable() const { return ( 32 <= key && key < 127 ); }
+
+ Key toUpper() const
+ { return Key( 'A' + ( key - 'a' ) ); }
+ Key toLower() const
+ { return Key( 'a' + ( key - 'A' ) ); }
+
+ void operator+=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key += other.key;
+ }
+
+ void operator-=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key -= other.key;
+ }
+
+ void operator|=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key |= other.key;
+ }
+
+ /* Decrement. Needed only for ranges. */
+ inline void decrement();
+ inline void increment();
+};
+
+struct HostType
+{
+ char *data1;
+ char *data2;
+ bool isSigned;
+ long long minVal;
+ long long maxVal;
+ unsigned int size;
+};
+
+struct HostLang
+{
+ HostType *hostTypes;
+ int numHostTypes;
+ HostType *defaultAlphType;
+ bool explicitUnsigned;
+};
+
+
+/* Target language. */
+enum HostLangType
+{
+ CCode,
+ DCode,
+ JavaCode
+};
+
+extern HostLang *hostLang;
+extern HostLangType hostLangType;
+
+extern HostLang hostLangC;
+extern HostLang hostLangD;
+extern HostLang hostLangJava;
+
+/* An abstraction of the key operators that manages key operations such as
+ * comparison and increment according the signedness of the key. */
+struct KeyOps
+{
+ /* Default to signed alphabet. */
+ KeyOps() :
+ isSigned(true),
+ alphType(0)
+ {}
+
+ /* Default to signed alphabet. */
+ KeyOps( bool isSigned )
+ :isSigned(isSigned) {}
+
+ bool isSigned;
+ Key minKey, maxKey;
+ HostType *alphType;
+
+ void setAlphType( HostType *alphType )
+ {
+ this->alphType = alphType;
+ isSigned = alphType->isSigned;
+ if ( isSigned ) {
+ minKey = (long) alphType->minVal;
+ maxKey = (long) alphType->maxVal;
+ }
+ else {
+ minKey = (long) (unsigned long) alphType->minVal;
+ maxKey = (long) (unsigned long) alphType->maxVal;
+ }
+ }
+
+ /* Compute the distance between two keys. */
+ Size span( Key key1, Key key2 )
+ {
+ return isSigned ?
+ (unsigned long long)(
+ (long long)key2.key -
+ (long long)key1.key + 1) :
+ (unsigned long long)(
+ (unsigned long)key2.key) -
+ (unsigned long long)((unsigned long)key1.key) + 1;
+ }
+
+ Size alphSize()
+ { return span( minKey, maxKey ); }
+
+ HostType *typeSubsumes( long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+
+ HostType *typeSubsumes( bool isSigned, long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( ( isSigned && hostLang->hostTypes[i].isSigned || !isSigned ) &&
+ maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+};
+
+extern KeyOps *keyOps;
+
+inline bool operator<( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key < key2.key :
+ (unsigned long)key1.key < (unsigned long)key2.key;
+}
+
+inline bool operator<=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key <= key2.key :
+ (unsigned long)key1.key <= (unsigned long)key2.key;
+}
+
+inline bool operator>( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key > key2.key :
+ (unsigned long)key1.key > (unsigned long)key2.key;
+}
+
+inline bool operator>=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key >= key2.key :
+ (unsigned long)key1.key >= (unsigned long)key2.key;
+}
+
+inline bool operator==( const Key key1, const Key key2 )
+{
+ return key1.key == key2.key;
+}
+
+inline bool operator!=( const Key key1, const Key key2 )
+{
+ return key1.key != key2.key;
+}
+
+/* Decrement. Needed only for ranges. */
+inline void Key::decrement()
+{
+ key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1;
+}
+
+/* Increment. Needed only for ranges. */
+inline void Key::increment()
+{
+ key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1;
+}
+
+inline long long Key::getLongLong() const
+{
+ return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
+}
+
+inline Key operator+(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key + key2.key );
+}
+
+inline Key operator-(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key - key2.key );
+}
+
+inline long operator&(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key & key2.key;
+}
+
+inline Key operator/(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key / key2.key;
+}
+
+#endif /* _COMMON_H */
diff --git a/common/config.h.in b/common/config.h.in
new file mode 100644
index 0000000..0285bb0
--- /dev/null
+++ b/common/config.h.in
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+
+/* Compilers. */
+#undef GDC
+#undef GOBJC
+#undef CXX
+#undef CC
+#undef JAVAC
+#undef TXL
+
+#endif /* _CONFIG_H */
diff --git a/common/pcheck.h b/common/pcheck.h
new file mode 100644
index 0000000..0b836a5
--- /dev/null
+++ b/common/pcheck.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PCHECK_H
+#define _PCHECK_H
+
+class ParamCheck
+{
+public:
+ ParamCheck(char *paramSpec, int argc, char **argv);
+
+ bool check();
+
+ char *parameterArg; /* The argument to the parameter. */
+ char parameter; /* The parameter matched. */
+ enum { match, invalid, noparam } state;
+
+ char *argOffset; /* If we are reading params inside an
+ * arg this points to the offset. */
+
+ char *curArg; /* Pointer to the current arg. */
+ int iCurArg; /* Index to the current arg. */
+
+private:
+ char *paramSpec; /* Parameter spec supplied by the coder. */
+ int argc; /* Arguement data from the command line. */
+ char **argv;
+
+};
+
+#endif /* _PCHECK_H */
diff --git a/configure b/configure
new file mode 100755
index 0000000..130108c
--- /dev/null
+++ b/configure
@@ -0,0 +1,3991 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.59.
+#
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+exec 6>&1
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_config_libobj_dir=.
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Maximum number of lines to put in a shell here document.
+# This variable seems obsolete. It should probably be removed, and
+# only ac_max_sed_lines should be used.
+: ${ac_max_here_lines=38}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_unique_file="ragel/main.cpp"
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS BUILD_PARSERS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CXX CXXFLAGS ac_ct_CXX SET_MAKE FLEX GPERF BISON GDC GOBJC JAVAC TXL LIBOBJS LTLIBOBJS'
+ac_subst_files=''
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_option in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ eval "enable_$ac_feature=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_$ac_feature='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_$ac_package='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/-/_/g'`
+ eval "with_$ac_package=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ { echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute paths.
+for ac_var in exec_prefix prefix
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# Be sure to have absolute paths.
+for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
+ localstatedir libdir includedir oldincludedir infodir mandir
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_confdir=`(dirname "$0") 2>/dev/null ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$0" : 'X\(//\)[^/]' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$0" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
+ { (exit 1); exit 1; }; }
+ else
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
+ fi
+fi
+(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
+ { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
+ { (exit 1); exit 1; }; }
+srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
+ac_env_build_alias_set=${build_alias+set}
+ac_env_build_alias_value=$build_alias
+ac_cv_env_build_alias_set=${build_alias+set}
+ac_cv_env_build_alias_value=$build_alias
+ac_env_host_alias_set=${host_alias+set}
+ac_env_host_alias_value=$host_alias
+ac_cv_env_host_alias_set=${host_alias+set}
+ac_cv_env_host_alias_value=$host_alias
+ac_env_target_alias_set=${target_alias+set}
+ac_env_target_alias_value=$target_alias
+ac_cv_env_target_alias_set=${target_alias+set}
+ac_cv_env_target_alias_value=$target_alias
+ac_env_CC_set=${CC+set}
+ac_env_CC_value=$CC
+ac_cv_env_CC_set=${CC+set}
+ac_cv_env_CC_value=$CC
+ac_env_CFLAGS_set=${CFLAGS+set}
+ac_env_CFLAGS_value=$CFLAGS
+ac_cv_env_CFLAGS_set=${CFLAGS+set}
+ac_cv_env_CFLAGS_value=$CFLAGS
+ac_env_LDFLAGS_set=${LDFLAGS+set}
+ac_env_LDFLAGS_value=$LDFLAGS
+ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
+ac_cv_env_LDFLAGS_value=$LDFLAGS
+ac_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_env_CPPFLAGS_value=$CPPFLAGS
+ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_cv_env_CPPFLAGS_value=$CPPFLAGS
+ac_env_CXX_set=${CXX+set}
+ac_env_CXX_value=$CXX
+ac_cv_env_CXX_set=${CXX+set}
+ac_cv_env_CXX_value=$CXX
+ac_env_CXXFLAGS_set=${CXXFLAGS+set}
+ac_env_CXXFLAGS_value=$CXXFLAGS
+ac_cv_env_CXXFLAGS_set=${CXXFLAGS+set}
+ac_cv_env_CXXFLAGS_value=$CXXFLAGS
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+_ACEOF
+
+ cat <<_ACEOF
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --infodir=DIR info documentation [PREFIX/info]
+ --mandir=DIR man documentation [PREFIX/man]
+_ACEOF
+
+ cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
+ headers in a nonstandard directory <include dir>
+ CXX C++ compiler command
+ CXXFLAGS C++ compiler flags
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ ac_popdir=`pwd`
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d $ac_dir || continue
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+ case "$ac_dir" in
+ .) ac_abs_builddir=`pwd`;;
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+ case ${ac_top_builddir}. in
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+ case $ac_srcdir in
+ .) ac_abs_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+ case $ac_top_srcdir in
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+ esac;;
+esac
+
+ cd $ac_dir
+ # Check for guested configure; otherwise get Cygnus style configure.
+ if test -f $ac_srcdir/configure.gnu; then
+ echo
+ $SHELL $ac_srcdir/configure.gnu --help=recursive
+ elif test -f $ac_srcdir/configure; then
+ echo
+ $SHELL $ac_srcdir/configure --help=recursive
+ elif test -f $ac_srcdir/configure.ac ||
+ test -f $ac_srcdir/configure.in; then
+ echo
+ $ac_configure --help
+ else
+ echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi
+ cd "$ac_popdir"
+ done
+fi
+
+test -n "$ac_init_help" && exit 0
+if $ac_init_version; then
+ cat <<\_ACEOF
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit 0
+fi
+exec 5>config.log
+cat >&5 <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.59. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ echo "PATH: $as_dir"
+done
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_sep=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+ 2)
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
+ # Get rid of the leading space.
+ ac_sep=" "
+ ;;
+ esac
+ done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Be sure not to use single quotes in there, as some shells,
+# such as our DU 5.0 friend, will then `close' the trap.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+{
+ (set) 2>&1 |
+ case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ sed -n \
+ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
+ ;;
+ *)
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+}
+ echo
+
+ cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ cat <<\_ASBOX
+## ------------- ##
+## Output files. ##
+## ------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+ echo
+ sed "/^$/d" confdefs.h | sort
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ echo "$as_me: caught signal $ac_signal"
+ echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core &&
+ rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+ ' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . $cache_file;;
+ *) . ./$cache_file;;
+ esac
+ fi
+else
+ { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in `(set) 2>&1 |
+ sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val="\$ac_cv_env_${ac_var}_value"
+ eval ac_new_val="\$ac_env_${ac_var}_value"
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
+echo "$as_me: former value: $ac_old_val" >&2;}
+ { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
+echo "$as_me: current value: $ac_new_val" >&2;}
+ ac_cache_corrupted=:
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ac_config_headers="$ac_config_headers common/config.h"
+
+
+BUILD_PARSERS=true
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$ac_ct_CC" && break
+done
+
+ CC=$ac_ct_CC
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO:" \
+ "checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
+ (eval $ac_compiler --version </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
+ (eval $ac_compiler -v </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
+ (eval $ac_compiler -V </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
+ (eval $ac_link_default) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # Find the output, starting from the most likely. This scheme is
+# not robust to junk in `.', hence go to wildcards (a.*) only as a last
+# resort.
+
+# Be careful to initialize this variable, since it used to be cached.
+# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
+ac_cv_exeext=
+# b.out is created by i960 compilers.
+for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj )
+ ;;
+ conftest.$ac_ext )
+ # This is the source file.
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ # FIXME: I believe we export ac_cv_exeext for Libtool,
+ # but it would be cool to find out if it's true. Does anybody
+ # maintain Libtool? --akim.
+ export ac_cv_exeext
+ break;;
+ * )
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6
+
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ fi
+fi
+echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
+echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6
+
+echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ export ac_cv_exeext
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_compiler_gnu=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+CFLAGS="-g"
+echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_prog_cc_g=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5
+echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_prog_cc_stdc=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std1 is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std1. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_stdc=$ac_arg
+break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext
+done
+rm -f conftest.$ac_ext conftest.$ac_objext
+CC=$ac_save_CC
+
+fi
+
+case "x$ac_cv_prog_cc_stdc" in
+ x|xno)
+ echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6 ;;
+ *)
+ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6
+ CC="$CC $ac_cv_prog_cc_stdc" ;;
+esac
+
+# Some people use a C++ compiler to compile C. Since we use `exit',
+# in C++ we need to declare it. In case someone uses the same compiler
+# for both compiling C and C++ we need to have the C++ compiler decide
+# the declaration of exit, since it's the most demanding environment.
+cat >conftest.$ac_ext <<_ACEOF
+#ifndef __cplusplus
+ choke me
+#endif
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ for ac_declaration in \
+ '' \
+ 'extern "C" void std::exit (int) throw (); using std::exit;' \
+ 'extern "C" void std::exit (int); using std::exit;' \
+ 'extern "C" void exit (int) throw ();' \
+ 'extern "C" void exit (int);' \
+ 'void exit (int);'
+do
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+#include <stdlib.h>
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+continue
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+done
+rm -f conftest*
+if test -n "$ac_declaration"; then
+ echo '#ifdef __cplusplus' >>confdefs.h
+ echo $ac_declaration >>confdefs.h
+ echo '#endif' >>confdefs.h
+fi
+
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+cat >>confdefs.h <<_ACEOF
+#define CC $CC
+_ACEOF
+
+
+ac_ext=cc
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CXX+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CXX"; then
+ ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+ echo "$as_me:$LINENO: result: $CXX" >&5
+echo "${ECHO_T}$CXX" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$CXX" && break
+ done
+fi
+if test -z "$CXX"; then
+ ac_ct_CXX=$CXX
+ for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CXX"; then
+ ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CXX="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5
+echo "${ECHO_T}$ac_ct_CXX" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$ac_ct_CXX" && break
+done
+test -n "$ac_ct_CXX" || ac_ct_CXX="g++"
+
+ CXX=$ac_ct_CXX
+fi
+
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO:" \
+ "checking for C++ compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
+ (eval $ac_compiler --version </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
+ (eval $ac_compiler -v </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
+ (eval $ac_compiler -V </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6
+if test "${ac_cv_cxx_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_compiler_gnu=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6
+GXX=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+CXXFLAGS="-g"
+echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5
+echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6
+if test "${ac_cv_prog_cxx_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cxx_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_prog_cxx_g=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6
+if test "$ac_test_CXXFLAGS" = set; then
+ CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+ if test "$GXX" = yes; then
+ CXXFLAGS="-g -O2"
+ else
+ CXXFLAGS="-g"
+ fi
+else
+ if test "$GXX" = yes; then
+ CXXFLAGS="-O2"
+ else
+ CXXFLAGS=
+ fi
+fi
+for ac_declaration in \
+ '' \
+ 'extern "C" void std::exit (int) throw (); using std::exit;' \
+ 'extern "C" void std::exit (int); using std::exit;' \
+ 'extern "C" void exit (int) throw ();' \
+ 'extern "C" void exit (int);' \
+ 'void exit (int);'
+do
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+#include <stdlib.h>
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+continue
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+done
+rm -f conftest*
+if test -n "$ac_declaration"; then
+ echo '#ifdef __cplusplus' >>confdefs.h
+ echo $ac_declaration >>confdefs.h
+ echo '#endif' >>confdefs.h
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+cat >>confdefs.h <<_ACEOF
+#define CXX $CXX
+_ACEOF
+
+
+ac_ext=cc
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6
+set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y,:./+-,___p_,'`
+if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.make <<\_ACEOF
+all:
+ @echo 'ac_maketemp="$(MAKE)"'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+eval `${MAKE-make} -f conftest.make 2>/dev/null | grep temp=`
+if test -n "$ac_maketemp"; then
+ eval ac_cv_prog_make_${ac_make}_set=yes
+else
+ eval ac_cv_prog_make_${ac_make}_set=no
+fi
+rm -f conftest.make
+fi
+if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+ SET_MAKE=
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+ SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+
+if test $BUILD_PARSERS = true; then
+
+# Extract the first word of "flex", so it can be a program name with args.
+set dummy flex; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_FLEX+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$FLEX"; then
+ ac_cv_prog_FLEX="$FLEX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_FLEX="flex"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+FLEX=$ac_cv_prog_FLEX
+if test -n "$FLEX"; then
+ echo "$as_me:$LINENO: result: $FLEX" >&5
+echo "${ECHO_T}$FLEX" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -z "$FLEX"; then
+ echo
+ echo "error: flex is required to compile ragel"
+ echo
+ exit 1
+fi
+
+# Extract the first word of "gperf", so it can be a program name with args.
+set dummy gperf; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_GPERF+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$GPERF"; then
+ ac_cv_prog_GPERF="$GPERF" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_GPERF="gperf"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+GPERF=$ac_cv_prog_GPERF
+if test -n "$GPERF"; then
+ echo "$as_me:$LINENO: result: $GPERF" >&5
+echo "${ECHO_T}$GPERF" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -z "$GPERF"; then
+ echo
+ echo "error: gperf is required to compile ragel"
+ echo
+ exit 1
+fi
+
+# Extract the first word of "bison", so it can be a program name with args.
+set dummy bison; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_BISON+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$BISON"; then
+ ac_cv_prog_BISON="$BISON" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_BISON="bison"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+BISON=$ac_cv_prog_BISON
+if test -n "$BISON"; then
+ echo "$as_me:$LINENO: result: $BISON" >&5
+echo "${ECHO_T}$BISON" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -z "$BISON"; then
+ echo
+ echo "error: bison is required to compile ragel"
+ echo
+ exit 1
+fi
+
+if "$BISON" --version | grep 'bison++'; then
+ echo
+ echo "error: sorry, ragel cannot be compiled with bison++"
+ echo
+ exit 1
+fi
+
+fi # BUILD_PARSERS
+
+# Extract the first word of "gdc", so it can be a program name with args.
+set dummy gdc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_GDC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$GDC"; then
+ ac_cv_prog_GDC="$GDC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_GDC="gdc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+GDC=$ac_cv_prog_GDC
+if test -n "$GDC"; then
+ echo "$as_me:$LINENO: result: $GDC" >&5
+echo "${ECHO_T}$GDC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -n "$GDC"; then
+ cat >>confdefs.h <<_ACEOF
+#define GDC $GDC
+_ACEOF
+
+fi
+
+echo "$as_me:$LINENO: checking for the Objective-C compiler" >&5
+echo $ECHO_N "checking for the Objective-C compiler... $ECHO_C" >&6
+cat > conftest.m <<EOF
+int main() { return 0; }
+EOF
+GOBJC=""
+if gcc -x objective-c conftest.m -o conftest.bin 2>/dev/null; then
+ GOBJC="gcc -x objective-c"
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+ cat >>confdefs.h <<_ACEOF
+#define GOBJC $GOBJC
+_ACEOF
+
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+
+# Extract the first word of "javac", so it can be a program name with args.
+set dummy javac; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_JAVAC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$JAVAC"; then
+ ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_JAVAC="javac"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+JAVAC=$ac_cv_prog_JAVAC
+if test -n "$JAVAC"; then
+ echo "$as_me:$LINENO: result: $JAVAC" >&5
+echo "${ECHO_T}$JAVAC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -n "$JAVAC"; then
+ cat >>confdefs.h <<_ACEOF
+#define JAVAC $JAVAC
+_ACEOF
+
+fi
+
+# Extract the first word of "txl", so it can be a program name with args.
+set dummy txl; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_TXL+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$TXL"; then
+ ac_cv_prog_TXL="$TXL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_TXL="txl"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+TXL=$ac_cv_prog_TXL
+if test -n "$TXL"; then
+ echo "$as_me:$LINENO: result: $TXL" >&5
+echo "${ECHO_T}$TXL" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+if test -n "$TXL"; then
+ cat >>confdefs.h <<_ACEOF
+#define TXL $TXL
+_ACEOF
+
+fi
+
+ ac_config_files="$ac_config_files Makefile common/Makefile ragel/Makefile rlcodegen/Makefile doc/Makefile test/Makefile"
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+{
+ (set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+} |
+ sed '
+ t clear
+ : clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ : end' >>confcache
+if diff $cache_file confcache >/dev/null 2>&1; then :; else
+ if test -w $cache_file; then
+ test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[ ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[ ]*$//;
+}'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_i=`echo "$ac_i" |
+ sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
+ # 2. Add them.
+ ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+exec 6>&1
+
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling. Logging --version etc. is OK.
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.59. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+_ACEOF
+
+# Files that config.status was made for.
+if test -n "$ac_config_files"; then
+ echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_headers"; then
+ echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_links"; then
+ echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_commands"; then
+ echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number, then exit
+ -q, --quiet do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Report bugs to <bug-autoconf@gnu.org>."
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.59,
+ with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=$srcdir
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=*)
+ ac_option=`expr "x$1" : 'x\([^=]*\)='`
+ ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ -*)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_option=$1
+ ac_need_defaults=false;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --vers* | -V )
+ echo "$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; };;
+ --help | --hel | -h )
+ echo "$ac_cs_usage"; exit 0 ;;
+ --debug | --d* | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ $ac_shift
+ CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+ ac_need_defaults=false;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; } ;;
+
+ *) ac_config_targets="$ac_config_targets $1" ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+ echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+ exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+
+
+
+
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_config_target in $ac_config_targets
+do
+ case "$ac_config_target" in
+ # Handling of arguments.
+ "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+ "common/Makefile" ) CONFIG_FILES="$CONFIG_FILES common/Makefile" ;;
+ "ragel/Makefile" ) CONFIG_FILES="$CONFIG_FILES ragel/Makefile" ;;
+ "rlcodegen/Makefile" ) CONFIG_FILES="$CONFIG_FILES rlcodegen/Makefile" ;;
+ "doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
+ "test/Makefile" ) CONFIG_FILES="$CONFIG_FILES test/Makefile" ;;
+ "common/config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS common/config.h" ;;
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason to put it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+ trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+} ||
+{
+ tmp=./confstat$$-$RANDOM
+ (umask 077 && mkdir $tmp)
+} ||
+{
+ echo "$me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
+ s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
+s,@SHELL@,$SHELL,;t t
+s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
+s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
+s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
+s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
+s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
+s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
+s,@exec_prefix@,$exec_prefix,;t t
+s,@prefix@,$prefix,;t t
+s,@program_transform_name@,$program_transform_name,;t t
+s,@bindir@,$bindir,;t t
+s,@sbindir@,$sbindir,;t t
+s,@libexecdir@,$libexecdir,;t t
+s,@datadir@,$datadir,;t t
+s,@sysconfdir@,$sysconfdir,;t t
+s,@sharedstatedir@,$sharedstatedir,;t t
+s,@localstatedir@,$localstatedir,;t t
+s,@libdir@,$libdir,;t t
+s,@includedir@,$includedir,;t t
+s,@oldincludedir@,$oldincludedir,;t t
+s,@infodir@,$infodir,;t t
+s,@mandir@,$mandir,;t t
+s,@build_alias@,$build_alias,;t t
+s,@host_alias@,$host_alias,;t t
+s,@target_alias@,$target_alias,;t t
+s,@DEFS@,$DEFS,;t t
+s,@ECHO_C@,$ECHO_C,;t t
+s,@ECHO_N@,$ECHO_N,;t t
+s,@ECHO_T@,$ECHO_T,;t t
+s,@LIBS@,$LIBS,;t t
+s,@BUILD_PARSERS@,$BUILD_PARSERS,;t t
+s,@CC@,$CC,;t t
+s,@CFLAGS@,$CFLAGS,;t t
+s,@LDFLAGS@,$LDFLAGS,;t t
+s,@CPPFLAGS@,$CPPFLAGS,;t t
+s,@ac_ct_CC@,$ac_ct_CC,;t t
+s,@EXEEXT@,$EXEEXT,;t t
+s,@OBJEXT@,$OBJEXT,;t t
+s,@CXX@,$CXX,;t t
+s,@CXXFLAGS@,$CXXFLAGS,;t t
+s,@ac_ct_CXX@,$ac_ct_CXX,;t t
+s,@SET_MAKE@,$SET_MAKE,;t t
+s,@FLEX@,$FLEX,;t t
+s,@GPERF@,$GPERF,;t t
+s,@BISON@,$BISON,;t t
+s,@GDC@,$GDC,;t t
+s,@GOBJC@,$GOBJC,;t t
+s,@JAVAC@,$JAVAC,;t t
+s,@TXL@,$TXL,;t t
+s,@LIBOBJS@,$LIBOBJS,;t t
+s,@LTLIBOBJS@,$LTLIBOBJS,;t t
+CEOF
+
+_ACEOF
+
+ cat >>$CONFIG_STATUS <<\_ACEOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ else
+ sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ fi
+ if test ! -s $tmp/subs.frag; then
+ ac_more_lines=false
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+ case "$ac_dir" in
+ .) ac_abs_builddir=`pwd`;;
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+ case ${ac_top_builddir}. in
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+ case $ac_srcdir in
+ .) ac_abs_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+ case $ac_top_srcdir in
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+ esac;;
+esac
+
+
+
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ configure_input=
+ else
+ configure_input="$ac_file. "
+ fi
+ configure_input=$configure_input"Generated from `echo $ac_file_in |
+ sed 's,.*/,,'` by configure."
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo "$f";;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo "$f"
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo "$srcdir/$f"
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+
+ if test x"$ac_file" != x-; then
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+ rm -f "$ac_file"
+ fi
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s,@configure_input@,$configure_input,;t t
+s,@srcdir@,$ac_srcdir,;t t
+s,@abs_srcdir@,$ac_abs_srcdir,;t t
+s,@top_srcdir@,$ac_top_srcdir,;t t
+s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s,@builddir@,$ac_builddir,;t t
+s,@abs_builddir@,$ac_abs_builddir,;t t
+s,@top_builddir@,$ac_top_builddir,;t t
+s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+ rm -f $tmp/stdin
+ if test x"$ac_file" != x-; then
+ mv $tmp/out $ac_file
+ else
+ cat $tmp/out
+ rm -f $tmp/out
+ fi
+
+done
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$,\1#\2'
+ac_dC=' '
+ac_dD=',;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$,\1#\2define\3'
+ac_uC=' '
+ac_uD=',;t'
+
+for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ # Do quote $f, to prevent DOS paths from being IFS'd.
+ echo "$f";;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo "$f"
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo "$srcdir/$f"
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+ # Remove the trailing spaces.
+ sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
+
+_ACEOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+# Using a here document instead of a string reduces the quoting nightmare.
+# Putting comments in sed scripts is not portable.
+#
+# `end' is used to avoid that the second main sed command (meant for
+# 0-ary CPP macros) applies to n-ary macro definitions.
+# See the Autoconf documentation for `clear'.
+cat >confdef2sed.sed <<\_ACEOF
+s/[\\&,]/\\&/g
+s,[\\$`],\\&,g
+t clear
+: clear
+s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
+t end
+s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
+: end
+_ACEOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC-DEFINE to be honored.
+uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
+sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f confdef2sed.sed
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\_ACEOF
+s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+_ACEOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $tmp/defines.sed.
+ echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#define' lines.
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/defines.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # grep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $tmp/undefs.sed.
+ echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#undef'
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/undefs.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ echo "/* Generated by configure. */" >$tmp/config.h
+ else
+ echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
+ fi
+ cat $tmp/in >>$tmp/config.h
+ rm -f $tmp/in
+ if test x"$ac_file" != x-; then
+ if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ rm -f $ac_file
+ mv $tmp/config.h $ac_file
+ fi
+ else
+ cat $tmp/config.h
+ rm -f $tmp/config.h
+ fi
+done
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || { (exit 1); exit 1; }
+fi
+
+
+echo "configuration of ragel complete"
diff --git a/configure.in b/configure.in
new file mode 100644
index 0000000..a8e55e6
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,118 @@
+dnl
+dnl Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+dnl
+
+dnl This file is part of Ragel.
+dnl
+dnl Ragel is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl Ragel is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with Ragel; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+AC_INIT(ragel/main.cpp)
+AC_CONFIG_HEADER(common/config.h)
+
+dnl Set true if build system should generate parsers from flex, bison, and
+dnl gperf sources. Set false if generated files are included and not to be
+dnl built (production).
+AC_SUBST(BUILD_PARSERS,true)
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_DEFINE_UNQUOTED(CC,$CC)
+
+dnl Checks for programs.
+AC_PROG_CXX
+AC_DEFINE_UNQUOTED(CXX,$CXX)
+
+dnl Set test on c++ compiler.
+AC_LANG_CPLUSPLUS
+
+dnl Check for definition of MAKE.
+AC_PROG_MAKE_SET
+
+if test $BUILD_PARSERS = true; then
+
+dnl Check for flex
+AC_CHECK_PROG(FLEX, flex, flex)
+if test -z "$FLEX"; then
+ echo
+ echo "error: flex is required to compile ragel"
+ echo
+ exit 1
+fi
+
+dnl Check for gperf
+AC_CHECK_PROG(GPERF, gperf, gperf)
+if test -z "$GPERF"; then
+ echo
+ echo "error: gperf is required to compile ragel"
+ echo
+ exit 1
+fi
+
+dnl Check for bison
+AC_CHECK_PROG(BISON, bison, bison)
+if test -z "$BISON"; then
+ echo
+ echo "error: bison is required to compile ragel"
+ echo
+ exit 1
+fi
+
+dnl Sorry, Ragel will not compile with bison++.
+if "$BISON" --version | grep 'bison++'; then
+ echo
+ echo "error: sorry, ragel cannot be compiled with bison++"
+ echo
+ exit 1
+fi
+
+fi # BUILD_PARSERS
+
+dnl Check for the D compiler
+AC_CHECK_PROG(GDC, gdc, gdc)
+if test -n "$GDC"; then
+ AC_DEFINE_UNQUOTED(GDC,$GDC)
+fi
+
+dnl Check for the Objective-C compiler
+AC_MSG_CHECKING([for the Objective-C compiler])
+cat > conftest.m <<EOF
+int main() { return 0; }
+EOF
+GOBJC=""
+if gcc -x objective-c conftest.m -o conftest.bin 2>/dev/null; then
+ GOBJC="gcc -x objective-c"
+ AC_MSG_RESULT([yes])
+ AC_DEFINE_UNQUOTED(GOBJC,$GOBJC)
+else
+ AC_MSG_RESULT([no])
+fi
+AC_SUBST(GOBJC)
+
+dnl Check for the Java compiler.
+AC_CHECK_PROG(JAVAC, javac, javac)
+if test -n "$JAVAC"; then
+ AC_DEFINE_UNQUOTED(JAVAC,$JAVAC)
+fi
+
+dnl Check for TXL.
+AC_CHECK_PROG(TXL, txl, txl)
+if test -n "$TXL"; then
+ AC_DEFINE_UNQUOTED(TXL,$TXL)
+fi
+
+dnl write output files
+AC_OUTPUT(Makefile common/Makefile ragel/Makefile rlcodegen/Makefile doc/Makefile test/Makefile)
+
+echo "configuration of ragel complete"
diff --git a/doc/Makefile.in b/doc/Makefile.in
new file mode 100644
index 0000000..f52e021
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,73 @@
+#
+# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+INPUT = version.tex ragel-guide.tex
+
+# Pick up all the figures in the current dir.
+FIGURES = $(wildcard *.fig)
+PDFFIGS = $(FIGURES:%.fig=%.pdf)
+
+# Get the version info.
+include ../version.mk
+
+# Install prefix.
+PREFIX = @prefix@
+
+# Rules.
+all: ragel-guide.pdf ragel.1 rlcodegen.1
+
+ragel-guide.pdf: $(PDFFIGS) $(INPUT)
+
+%.pdf: %.fig
+ fig2dev -L pdf $< $@
+
+%.pdf: %.tex
+ pdflatex -interaction=nonstopmode $< >/dev/null
+ pdflatex -interaction=nonstopmode $< >/dev/null
+ pdflatex -interaction=nonstopmode $< >/dev/null
+
+version.tex: ../version.mk
+ echo '\def\version{$(VERSION)}' > version.tex
+ echo '\def\pubdate{$(PUBDATE)}' >> version.tex
+
+ragel.1: ragel.1.in ../version.mk
+ cat ragel.1.in | sed 's/@PUBDATE@/$(PUBDATE)/' \
+ | sed 's/@VERSION@/$(VERSION)/' > ragel.1
+
+rlcodegen.1: rlcodegen.1.in ../version.mk
+ cat rlcodegen.1.in | sed 's/@PUBDATE@/$(PUBDATE)/' \
+ | sed 's/@VERSION@/$(VERSION)/' > rlcodegen.1
+
+clean:
+ rm -f ragel.1 rlcodegen.1 \
+ *.bak *.aux *.dvi *.log *.toc *.pdf
+
+distclean: clean
+ rm -f Makefile
+
+install: all
+ install -d $(PREFIX)/man/man1
+ install -m 644 ragel.1 $(PREFIX)/man/man1/ragel.1
+ install -m 644 rlcodegen.1 $(PREFIX)/man/man1/rlcodegen.1
+ install -d $(PREFIX)/share/doc/ragel
+ install -m 644 ragel-guide.pdf $(PREFIX)/share/doc/ragel/ragel-guide.pdf
+ gzip -c ../ChangeLog > ChangeLog.gz
+ install -m 644 ChangeLog.gz $(PREFIX)/share/doc/ragel/ChangeLog.gz
+ rm ChangeLog.gz
diff --git a/doc/RELEASE_NOTES_V2 b/doc/RELEASE_NOTES_V2
new file mode 100644
index 0000000..1d03eda
--- /dev/null
+++ b/doc/RELEASE_NOTES_V2
@@ -0,0 +1,86 @@
+ Porting Ragel Programs to Version 2
+ ===================================
+
+
+1. Move all ?, +, and * operators to the right hand side of the operand.
+
+ float = *digit ?('.' +digit);
+
+ float = digit* ('.' digit+)?;
+
+2. Change all assignments to main from a definition using the = operator to an
+instantiation using the := operator.
+
+ main = 'hello';
+
+ main := 'hello';
+
+3. Remove $0 %! operations for clearing priorities.
+
+4. Anywhere implicit default priorities of zero are used to interact with
+explicitly set non-zero transitions, set the priorities to zero explicitly.
+
+ main := any* 'FIN' :1;
+
+ main := ( any $0 )* 'FIN' :1;
+
+5. If priorities need to interact across different machines, use a common name.
+Note that priority names default to the name of the machine they are assigned
+to.
+
+ wild = any*;
+ main := wild 'FIN' :1;
+
+ wild = ( any $0 )*;
+ main := wild 'FIN' :wild,1;
+
+6. If using clear keyword or operators modified with ^, duplicate the operand
+machines and rewrite them such that the cleared actions and suppressed out
+transitions and out priorities are removed.
+
+7. Change func keyword to action.
+
+8. Escape any - symbols and initial ^ symbol in or literals ([] outside of
+regular expressions).
+
+ main := [^#$-+*];
+
+ main := [\^#$\-+*];
+
+9. In C output, lowercase init, execute and finish routines and put an
+underscore in between the fsm name and the function name. Also qualify
+references to the fsm structure with the struct keyword.
+
+ fsm f;
+ fsmInit( &f );
+ fsmExecute( &f, buf, len );
+ fsmFinish( &f );
+
+ struct fsm f;
+ fsm_init( &f );
+ fsm_execute( &f, buf, len );
+ fsm_finish( &f );
+
+10. In C++ output, lowercase the init, execute and finish routines. Also make
+sure that the init routine is explicitly called.
+
+ fsm f;
+ f.Init();
+ f.Execute( buf, len );
+ f.Finish();
+
+ fsm f;
+ f.init();
+ f.execute( buf, len );
+ f.finish();
+
+11. Remove calls to the accept routine, instead examine the return value of the
+finish routine. If the machine does not accept then finish returns -1 or 0, if
+the machine accepts then finish returns 1.
+
+ f.finish();
+ if ( f.accept() )
+ cout << "ACCEPT" << endl;
+
+ if ( f.finish() > 0 )
+ cout << "ACCEPT" << endl;
diff --git a/doc/RELEASE_NOTES_V3 b/doc/RELEASE_NOTES_V3
new file mode 100644
index 0000000..64dd2f1
--- /dev/null
+++ b/doc/RELEASE_NOTES_V3
@@ -0,0 +1,8 @@
+ Porting Ragel Version 2 Programs to Version 3
+ =============================================
+
+1. Replace all instances of *p in action code with the keyword fc.
+
+2. Replace all instances of : used to set actions or priorities with @.
+
+3. Wrap named priorities in parentheses so they are of the form @(name,1).
diff --git a/doc/RELEASE_NOTES_V4 b/doc/RELEASE_NOTES_V4
new file mode 100644
index 0000000..a142f36
--- /dev/null
+++ b/doc/RELEASE_NOTES_V4
@@ -0,0 +1,361 @@
+
+ RELEASE NOTES Ragel 4.X
+
+
+To-State and From-State Action Embedding Operators Added (4.2)
+==============================================================
+
+Added operators for embedding actions into all transitions into a state and all
+transitions out of a state. These embeddings stay with the state, and are
+irrespective of what the current transitions are and any future transitions
+that may be added into or out of the state.
+
+In the following example act is executed on the transitions for 't' and 'y'.
+Even though it is only embedded in the context of the first alternative. This
+is because after matching 'hi ', the machine has not yet distinguished beween
+the two threads. The machine is simultaneously in the state expecting 'there'
+and the state expecting 'you'.
+
+ action act {}
+ main :=
+ 'hi ' %*act 'there' |
+ 'hi you';
+
+The to-state action embedding operators embed into transitions that go into:
+>~ the start state
+$~ all states
+%~ final states
+<~ states that are not the start
+@~ states that are not final
+<@~ states that are not the start AND not final
+
+The from-state action embedding operators embed into transitions that leave:
+>* the start state
+$* all states
+%* final states
+<* states that are not the start
+@* states that are not final
+<@* states that are not the start AND not final
+
+Changed Operators for Embedding Context/Actions Into States (4.2)
+=================================================================
+
+The operators used to embed context and actions into states have been modified.
+The purpose of the modification is to make it easier to distribute actions to
+take among the states in a chain of concatenations such that each state has
+only a single action embedded. An example follows below.
+
+Now Gone:
+
+1. The use of >@ for selecting the states to modfiy (as in >@/ to embed eof
+ actions, etc) has been removed. This prefix meant start state OR not start AND
+ not final.
+
+2. The use of @% for selecting states to modify (as in @%/ to embed eof
+ actions, etc) has been removed. This prefix previously meant not start AND not
+ final OR final.
+
+Now Added:
+
+1. The prefix < which means not start.
+2. The prefix @ which means not final.
+3. The prefix <@ which means not start & not final"
+
+The new matrix of operators used to embed into states is:
+
+>: $: %: <: @: <@: - context
+>~ $~ %~ <~ @~ <@~ - to state action
+>* $* %* <* @* <@* - from state action
+>/ $/ %/ </ @/ <@/ - eof action
+>! $! %! <! @! <@! - error action
+>^ $^ %^ <^ @^ <@^ - local error action
+
+| | | | | |
+| | | | | *- not start & not final
+| | | | |
+| | | | *- not final
+| | | |
+| | | *- not start
+| | |
+| | *- final
+| |
+| *- all states
+|
+*- start state
+
+This example shows one way to use the new operators to cover all the states
+with a single action. The embedding of eof2 covers all the states in m2. The
+embeddings of eof1 and eof3 avoid the boundaries that m1 and m3 both share with
+m2.
+
+ action eof1 {}
+ action eof2 {}
+ action eof3 {}
+ m1 = 'm1';
+ m2 = ' '+;
+ m3 = 'm3';
+
+ main := m1 @/eof1 . m2 $/eof2 . m3 </eof3;
+
+Verbose Action, Priority and Context Embedding Added (4.2)
+==========================================================
+
+As an alternative to the symbol-based action, priority and context embedding
+operators, a more verbose form of embedding has been added. The general form of
+the verbose embedding is:
+
+ machine <- location [modifier] embedding_type value
+
+For embeddings into transitions, the possible locations are:
+ enter -- entering transitions
+ all -- all transitions
+ finish -- transitions into a final state
+ leave -- pending transitions out of the final states
+
+For embeddings into states, the possible locations are:
+ start -- the start state
+ all -- all states
+ final -- final states
+ !start -- all states except the start
+ !final -- states that are not final
+ !start !final -- states that are not the start and not final
+
+The embedding types are:
+ exec -- an action into transitions
+ pri -- a priority into transitions
+ ctx -- a named context into a state
+ into -- an action into all transitions into a state
+ from -- an action into all transitions out of a state
+ err -- an error action into a state
+ lerr -- a local error action into a state
+
+The possible modfiers:
+ on name -- specify a name for priority and local error embedding
+
+Character-Level Negation '^' Added (4.1)
+========================================
+
+A character-level negation operator ^ was added. This operator has the same
+precedence level as !. It is used to match single characters that are not
+matched by the machine it operates on. The expression ^m is equivalent to
+(any-(m)). This machine makes sense only when applied to machines that match
+single characters. Since subtraction is essentially a set difference, any
+strings matched by m that are not of length 1 will be ignored by the
+subtraction and have no effect.
+
+Discontinued Plus Sign To Specifify Positive Literal Numbers (4.1)
+==================================================================
+
+The use of + to specify a literal number as positive has been removed. This
+notation is redundant because all literals are positive by default. It was
+unlikely to be used but was provided for consistency. This notation caused an
+ambiguity with the '+' repetition operator. Due to this ambibuity, and the fact
+that it is unlikely to be used and is completely unnecessary when it is, it has
+been removed. This simplifies the design. It elimnates possible confusion and
+removes the need to explain why the ambiguity exists and how it is resolved.
+
+As a consequence of the removal, any expression (m +1) or (m+1) will now be
+parsed as (m+ . 1) rather then (m . +1). This is because previously the scanner
+handled positive literals and therefore they got precedence over the repetition
+operator.
+
+Precedence of Subtraction Operator vs Negative Literals Changed (4.1)
+=====================================================================
+
+Previously, the scanner located negative numbers and therefore gave a higher
+priority to the use of - to specify a negative literal number. This has
+changed, precedence is now given to the subtraction operator.
+
+This change is for two reasons: A) The subtraction operator is far more common
+than negative literal numbers. I have quite often been fooled by writing
+(any-0) and having it parsed as ( any . -0 ) rather than ( any - 0 ) as I
+wanted. B) In the definition of concatentation I want to maintain that
+concatenation is used only when there are no other binary operators separating
+two machines. In the case of (any-0) there is an operator separating the
+machines and parsing this as the concatenation of (any . -0) violates this
+rule.
+
+Duplicate Actions are Removed From Action Lists (4.1)
+=====================================================
+
+With previous versions of Ragel, effort was often expended towards ensuring
+identical machines were not uniononed together, causing duplicate actions to
+appear in the same action list (transition or eof perhaps). Often this required
+factoring out a machine or specializing a machine's purpose. For example,
+consider the following machine:
+
+ word = [a-z]+ >s $a %l;
+ main :=
+ ( word ' ' word ) |
+ ( word '\t' word );
+
+This machine needed to be rewritten as the following to avoid duplicate
+actions. This is essentially a refactoring of the machine.
+
+ main := word ( ' ' | '\t' ) word;
+
+An alternative was to specialize the machines:
+
+ word1 = [a-z]+ >s $a %l;
+ word2 = [a-z]+;
+ main :=
+ ( word1 ' ' word1 ) |
+ ( word2 '\t' word1 );
+
+Since duplicating an action on a transition is never (in my experience) desired
+and must be manually avoided, sometimes to the point of obscuring the machine
+specification, it is now done automatically by Ragel. This change should have
+no effect on existing code that is properly written and will allow the
+programmer more freedom when writing new code.
+
+New Frontend (4.0)
+==================
+
+The syntax for embedding Ragel statements into the host language has changed.
+The primary motivation is a better interaction with Objective-C. Under the
+previous scheme Ragel generated the opening and closing of the structure and
+the interface. The user could inject user defined declarations into the struct
+using the struct {}; statement, however there was no way to inject interface
+declarations. Under this scheme it was also awkward to give the machine a base
+class. Rather then add another statement similar to struct for including
+declarations in the interface we take the reverse approach, the user now writes
+the struct and interface and Ragel statements are injected as needed.
+
+Machine specifications now begin with %% and are followed with an optional name
+and either a single ragel statement or a sequence of statements enclosed in {}.
+If a machine specification does not have a name then Ragel tries to find a name
+for it by first checking if the specification is inside a struct or class or
+interface. If it is not then it uses the name of the previous machine
+specification. If still no name is found then an error is raised.
+
+Since the user now specifies the fsm struct directly and since the current
+state and stack variables are now of type integer in all code styles, it is
+more appropriate for the user to manage the declarations of these variables.
+Ragel no longer generates the current state and the stack data variables. This
+also gives the user more freedom in deciding how the stack is to be allocated,
+and also permits it to be grown as necessary, rather than allowing only a fixed
+stack size.
+
+FSM specifications now persist in memory, so the second time a specification of
+any particular name is seen the statements will be added to the previous
+specification. Due to this it is no longer necessary to give the element or
+alphabet type in the header portion and in the code portion. In addition there
+is now an include statement that allows the inclusion of the header portion of
+a machine it it resides in a different file, as well as allowing the inclusion
+of a machine spec of a different name from the any file at all.
+
+Ragel is still able to generate the machine's function declarations. This may
+not be required for C code, however this will be necessary for C++ and
+Objective-C code. This is now accomplished with the interface statement.
+
+Ragel now has different criteria for deciding what to generate. If the spec
+contains the interface statement then the machine's interface is generated. If
+the spec contains the definition of a main machine, then the code is generated.
+It is now possible to put common machine definitions into a separate library
+file and to include them in other machine specifications.
+
+To port Ragel 3.x programs to 4.x, the FSM's structure must be explicitly coded
+in the host language and it must include the declaration of current state. This
+should be called 'curs' and be of type int. If the machine uses the fcall
+and fret directives, the structure must also include the stack variables. The
+stack should be named 'stack' and be of type int*. The stack top should be
+named 'top' and be of type int.
+
+In Objective-C, the both the interface and implementation directives must also
+be explicitly coded by the user. Examples can be found in the section "New
+Interface Examples".
+
+Action and Priority Embedding Operators (4.0)
+=============================================
+
+In the interest of simplifying the language, operators now embed strictly
+either on characters or on EOF, but never both. Operators should be doing one
+well-defined thing, rather than have multiple effects. This also enables the
+detection of FSM commands that do not make sense in EOF actions.
+
+This change is summarized by:
+ -'%' operator embeds only into leaving characters.
+ -All global and local error operators only embed on error character
+ transitions, their action will not be triggerend on EOF in non-final states.
+ -Addition of EOF action embedding operators for all classes of states to make
+ up for functionality removed from other operators. These are >/ $/ @/ %/.
+ -Start transition operator '>' does not imply leaving transtions when start
+ state is final.
+
+This change results in a simpler and more direct relationship between the
+operators and the physical state machine entities they operate on. It removes
+the special cases within the operators that require you to stop and think as
+you program in Ragel.
+
+Previously, the pending out transition operator % simultaneously served two
+purposes. First, to embed actions to that are to get transfered to transitions
+made going out of the machine. These transitions are created by the
+concatentation and kleene star operators. Second, to specify actions that get
+executed on EOF should the final state in the machine to which the operator is
+applied remain final.
+
+To convert Ragel 3.x programs: Any place where there is an embedding of an
+action into pending out transitions using the % operator and the final states
+remain final in the end result machine, add an embedding of the same action
+using the EOF operator %/action.
+
+Also note that when generating dot file output of a specific component of a
+machine that has leaving transitions embedded in the final states, these
+transitions will no longer show up since leaving transtion operator no longer
+causes actions to be moved into the the EOF event when the state they are
+embeeded into becomes a final state of the final machine.
+
+Const Element Type (4.0)
+========================
+
+If the element type has not been defined, the previous behaviour was to default
+to the alphabet type. The element type however is usually not specified as
+const and in most cases the data pointer in the machine's execute function
+should be a const pointer. Therefore ragel now makes the element type default
+to a constant version of the alphabet type. This can always be changed by using
+the element statment. For example 'element char;' will result in a non-const
+data pointer.
+
+New Interface Examples (4.0)
+============================
+
+---------- C ----------
+
+struct fsm
+{
+ int curs;
+};
+
+%% fsm
+{
+ main := 'hello world';
+}
+
+--------- C++ ---------
+
+struct fsm
+{
+ int curs;
+ %% interface;
+};
+
+%% main := 'hello world';
+
+----- Objective-C -----
+
+@interface Clang : Object
+{
+@public
+ int curs;
+};
+
+%% interface;
+
+@end
+
+@implementation Clang
+
+%% main := 'hello world';
+
+@end
+
diff --git a/doc/RELEASE_NOTES_V5 b/doc/RELEASE_NOTES_V5
new file mode 100644
index 0000000..15147d8
--- /dev/null
+++ b/doc/RELEASE_NOTES_V5
@@ -0,0 +1,112 @@
+
+ RELEASE NOTES Ragel 5.X
+
+This file describes the changes in Ragel version 5.X that are not backwards
+compatible. For a list of all the changes see the ChangeLog file.
+
+
+Interface to Host Programming Language
+======================================
+
+In version 5.0 there is a new interface to the host programming language.
+There are two major changes: the way Ragel specifications are embedded in the
+host program text, and the way that the host program interfaces with the
+generated code.
+
+Multiline Ragel specifications begin with '%%{' and end with '}%%'. Single line
+specifications start with '%%' and end at the first newline. Machine names are
+given with the machine statement at the very beginning of a machine spec. This
+change was made in order to make the task of separating Ragel code from the
+host code as straightforward as possible. This will ease the addition of more
+supported host languages.
+
+Ragel no longer parses structure and class names in order to infer machine
+names. Parsing structures and clases requires knowledge of the host language
+hardcoded into Ragel. Since Ragel is moving towards language independence, this
+feature has been removed.
+
+If a machine spec does not have a name then the previous spec name is used. If
+there is no previous specification then this is an error.
+
+The second major frontend change in 5.0 is doing away with the init(),
+execute() and finish() routines. Instead of generating these functions Ragel
+now only generates their contents. This scheme is more flexible, allowing the
+user to use a single function to drive the machine or separate out the
+different tasks if desired. It also frees the user from having to build the
+machine around a structure or a class.
+
+An example machine is:
+
+--------------------------
+
+%%{
+ machine fsm;
+ main := 'hello world';
+}%%
+
+%% write data;
+
+int parse( char *p )
+{
+ int cs;
+ char *pe = p + strlen(p);
+ %%{
+ write init;
+ write exec;
+ }%%
+ return cs;
+};
+
+--------------------------
+
+The generated code expects certain variables to be available. In some cases
+only if the corresponding features are used.
+
+ el* p: A pointer to the data to parse.
+ el* pe: A pointer to one past the last item.
+ int cs: The current state.
+ el* tokstart: The beginning of current match of longest match machines.
+ el* tokend: The end of the current match.
+ int act: The longest match pattern that has been matched.
+ int stack[n]: The stack for machine call statements
+ int top: The top of the stack for machine call statements
+
+It is possible to specify to Ragel how the generated code should access all the
+variables except p and pe by using the access statement.
+
+ access some_pointer->;
+ access variable_name_prefix;
+
+The writing statments are:
+
+ write data;
+ write init;
+ write exec;
+ write eof;
+
+There are some options available:
+
+ write data noerror nofinal noprefix;
+ write exec noend
+
+ noerror: Do not write the id of the error state.
+ nofinal: Do not write the id of the first_final state.
+ noprefix: Do not prefix the variable with the name of the machine
+ noend: Do not test if the current character has reached pe. This is
+ useful if one wishes to break out of the machine using fbreak
+ when hitting some marker, such as the null character.
+
+The fexec Action Statement Changed
+==================================
+
+The fexec action statement has been changed to take only the new position to
+move to. This statement is more useful for moving backwards and reparsing input
+than for specifying a whole new buffer entirely and has been shifted to this
+new use. Also, using only a single argument simplifies the parsing of Ragel
+input files and will ease the addition of other host languages.
+
+Context Embedding Has Been Dropped
+==================================
+
+The context embedding operators were not carried over from version 4.X. Though
+interesting, they have not found any real practical use.
diff --git a/doc/bmconcat.fig b/doc/bmconcat.fig
new file mode 100644
index 0000000..a47f13b
--- /dev/null
+++ b/doc/bmconcat.fig
@@ -0,0 +1,40 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1440 450 135 135 1440 450 1575 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2115 450 135 135 2115 450 2250 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2790 450 135 135 2790 450 2925 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3465 450 135 135 3465 450 3600 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4140 450 135 135 4140 450 4275 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4140 450 90 90 4140 450 4230 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 900 450 1305 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1575 450 1980 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2250 450 2655 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2925 450 3330 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 3600 450 4005 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 75 1035 405 h\001
+4 0 0 50 0 0 10 0.0000 4 75 60 1710 405 e\001
+4 0 0 50 0 0 10 0.0000 4 105 60 2385 405 l\001
+4 0 0 50 0 0 10 0.0000 4 105 60 3060 405 l\001
+4 0 0 50 0 0 10 0.0000 4 75 75 3735 405 o\001
diff --git a/doc/bmnull.fig b/doc/bmnull.fig
new file mode 100644
index 0000000..1b85885
--- /dev/null
+++ b/doc/bmnull.fig
@@ -0,0 +1,15 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 90 90 765 450 855 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
diff --git a/doc/bmnum.fig b/doc/bmnum.fig
new file mode 100644
index 0000000..5160114
--- /dev/null
+++ b/doc/bmnum.fig
@@ -0,0 +1,20 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 450 135 135 1665 450 1800 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 450 90 90 1665 450 1755 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 900 450 1530 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 75 270 1035 405 num\001
diff --git a/doc/bmor.fig b/doc/bmor.fig
new file mode 100644
index 0000000..69c6da0
--- /dev/null
+++ b/doc/bmor.fig
@@ -0,0 +1,28 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1327.500 103.500 810 585 1305 810 1845 585
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1327.500 -472.500 900 495 1305 585 1755 495
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1327.500 796.500 810 315 1305 90 1845 315
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1327.500 1372.500 900 405 1305 315 1755 405
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1890 450 90 90 1890 450 1980 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1890 450 135 135 1890 450 2025 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 75 1305 45 h\001
+4 0 0 50 0 0 10 0.0000 4 75 60 1305 270 e\001
+4 0 0 50 0 0 10 0.0000 4 105 60 1305 540 l\001
+4 0 0 50 0 0 10 0.0000 4 75 75 1305 765 o\001
diff --git a/doc/bmrange.fig b/doc/bmrange.fig
new file mode 100644
index 0000000..7ad3693
--- /dev/null
+++ b/doc/bmrange.fig
@@ -0,0 +1,20 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1710 450 135 135 1710 450 1845 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1710 450 90 90 1710 450 1800 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 900 450 1575 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 285 1080 405 l .. u\001
diff --git a/doc/bmregex.fig b/doc/bmregex.fig
new file mode 100644
index 0000000..5823524
--- /dev/null
+++ b/doc/bmregex.fig
@@ -0,0 +1,42 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3420.000 240.000 3330 360 3420 90 3510 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1440.000 240.000 1350 360 1440 90 1530 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2340.000 240.000 2250 360 2340 90 2430 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2880.000 266.250 3375 585 2880 855 2385 585
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 450 135 135 765 450 900 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1440 450 135 135 1440 450 1575 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2340 450 135 135 2340 450 2475 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3420 450 135 135 3420 450 3555 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3420 450 90 90 3420 450 3510 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 900 450 1305 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1575 450 2205 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2475 450 3285 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 495 315 360 405 630 495 450 540 450 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 75 60 1035 405 a\001
+4 0 0 50 0 0 10 0.0000 4 105 75 1395 45 b\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2250 45 df\001
+4 0 0 50 0 0 10 0.0000 4 135 315 2700 405 1,2,3\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1800 405 c-z\001
+4 0 0 50 0 0 10 0.0000 4 135 315 3285 45 1,2,3\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2790 810 df\001
diff --git a/doc/docbook.dsl b/doc/docbook.dsl
new file mode 100644
index 0000000..e8fabe0
--- /dev/null
+++ b/doc/docbook.dsl
@@ -0,0 +1,49 @@
+<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [
+<!ENTITY docbook.dsl PUBLIC
+ "-//Norman Walsh//DOCUMENT DocBook Print Stylesheet//EN" CDATA dsssl>
+]>
+
+<style-sheet>
+<style-specification use="docbook">
+<style-specification-body>
+
+;; your stuff goes here...
+
+(define %generate-article-titlepage% #t)
+(define %generate-article-toc% #t)
+(define %generate-article-titlepage-on-separate-page% #t)
+(define %generate-article-toc-on-titlepage% #f)
+(define %article-page-number-restart% #t)
+
+(define %chapter-autolabel% #t)
+(define %section-autolabel% #t)
+(define (toc-depth nd) 3)
+
+; === Media objects ===
+(define preferred-mediaobject-extensions ;; this magic allows to use different graphical
+ (list "eps")) ;; formats for printing and putting online
+(define acceptable-mediaobject-extensions
+ '())
+(define preferred-mediaobject-notations
+ (list "EPS"))
+(define acceptable-mediaobject-notations
+ (list "linespecific"))
+
+; === Rendering ===
+(define %head-after-factor% 0.2) ;; not much whitespace after orderedlist head
+(define ($paragraph$) ;; more whitespace after paragraph than before
+ (make paragraph
+ first-line-start-indent: (if (is-first-para)
+ %para-indent-firstpara%
+ %para-indent%)
+ space-before: (* %para-sep% 4)
+ space-after: (/ %para-sep% 4)
+ quadding: %default-quadding%
+ hyphenate?: %hyphenation%
+ language: (dsssl-language-code)
+ (process-children)))
+
+</style-specification-body>
+</style-specification>
+<external-specification id="docbook" document="docbook.dsl">
+</style-sheet>
diff --git a/doc/exaction.fig b/doc/exaction.fig
new file mode 100644
index 0000000..e41ef2e
--- /dev/null
+++ b/doc/exaction.fig
@@ -0,0 +1,37 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1620.000 400.500 1530 495 1620 270 1710 495
+ 1 1 2.00 60.00 60.00
+6 1377 810 1872 990
+4 0 0 50 0 0 10 0.0000 4 120 315 1557 945 /C,N\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1377 945 nl\001
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1620 585 135 135 1620 585 1755 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 585 135 135 2655 585 2790 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 585 90 90 2655 585 2745 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 585 1485 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1755 585 2520 585
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 630 135 495 225 765 315 585 360 585 450 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 630 720 765 900 1305 1035 1935 1035 2475 900 2610 720
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 120 495 855 540 a-z/A,B\001
+4 0 0 50 0 0 10 0.0000 4 105 330 1485 225 a-z/B\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1935 540 nl\001
+4 0 0 50 0 0 10 0.0000 4 120 315 2115 540 /C,N\001
diff --git a/doc/exallact.fig b/doc/exallact.fig
new file mode 100644
index 0000000..40f4fcb
--- /dev/null
+++ b/doc/exallact.fig
@@ -0,0 +1,25 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 135 135 2430 495 2565 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 90 90 2430 495 2520 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 495 1395 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1665 495 2295 495
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 540 180 405 270 675 360 495 405 495 495 495
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 285 945 450 m/A\001
+4 0 0 50 0 0 10 0.0000 4 135 360 1800 450 1,2/A\001
diff --git a/doc/exallpri.fig b/doc/exallpri.fig
new file mode 100644
index 0000000..1b3a7ad
--- /dev/null
+++ b/doc/exallpri.fig
@@ -0,0 +1,33 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 825.000 540 945 630 675 720 945
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 1035 1170 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1440 1035 1845 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2115 1035 2520 1035
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 1080 180 945 270 1215 360 1035 405 1035 495 1035
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 540 630 df\001
+4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001
+4 0 0 50 0 0 10 0.0000 4 105 60 1575 990 I\001
+4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001
diff --git a/doc/exconcat.fig b/doc/exconcat.fig
new file mode 100644
index 0000000..21bf76f
--- /dev/null
+++ b/doc/exconcat.fig
@@ -0,0 +1,93 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1080 135 135 1845 1080 1980 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3105 1080 135 135 3105 1080 3240 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3105 1080 90 90 3105 1080 3195 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1845 135 135 1845 1845 1980 1845
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1125 1575 135 135 1125 1575 1260 1575
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 1080 135 135 585 1080 720 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 1575 135 135 2565 1575 2700 1575
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 1080 1710 1080
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1215 1485 1755 1170
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 675 1170 1035 1485
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1260 1620 1710 1800
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1845 1710 1845 1215
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2655 1485 3015 1170
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2970 1080 1980 1080
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1980 1800 2430 1620
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2475 1485 1935 1170
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5
+ 1 1 2.00 60.00 60.00
+ 1755 1935 1485 2115 900 2070 405 1530 495 1170
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5
+ 1 1 2.00 60.00 60.00
+ 1035 1665 945 1755 765 1755 540 1530 585 1215
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 3
+ 1 1 2.00 60.00 60.00
+ 1755 990 1215 675 675 990
+ 0.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 450 1035 225 810 180 675 225 630 315 675 540 945
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1800 945 1800 765 1800 675 1890 675 1890 810 1890 945
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5
+ 1 1 2.00 60.00 60.00
+ 3105 945 3105 405 900 405 675 765 630 945
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 1125 135 990 225 1260 315 1080 360 1080 450 1080
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 9
+ 1 1 2.00 60.00 60.00
+ 3105 1215 3105 1350 3060 1620 2880 1845 2565 2070 2115 2160
+ 1710 2115 1350 1980 1170 1710
+ 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+ 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 675 1575 nl\001
+4 0 0 50 0 0 10 0.0000 4 105 90 855 1260 E\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1125 720 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1125 1035 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 180 585 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1755 630 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2475 1035 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 990 1980 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1755 360 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2205 1305 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2655 1305 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1305 1305 df\001
+4 0 0 50 0 0 10 0.0000 4 105 105 1485 1665 O\001
+4 0 0 50 0 0 10 0.0000 4 105 90 2115 1665 F\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1620 1485 df\001
+4 0 0 50 0 0 10 0.0000 4 105 90 2295 2025 E\001
diff --git a/doc/exdoneact.fig b/doc/exdoneact.fig
new file mode 100644
index 0000000..a9904af
--- /dev/null
+++ b/doc/exdoneact.fig
@@ -0,0 +1,24 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 310.500 540 405 630 180 720 405
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 90 90 1530 495 1620 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 495 1395 495
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 540 180 405 270 675 360 495 405 495 495 495
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 75 195 540 135 a-z\001
+4 0 0 50 0 12 10 0.0000 4 105 180 900 450 sp\001
+4 0 0 50 0 0 10 0.0000 4 105 165 1080 450 /A\001
diff --git a/doc/exdonepri.fig b/doc/exdonepri.fig
new file mode 100644
index 0000000..a76a485
--- /dev/null
+++ b/doc/exdonepri.fig
@@ -0,0 +1,55 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 1035 1170 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1440 1035 1845 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2115 1035 2520 1035
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 1080 180 945 270 1215 360 1035 405 1035 495 1035
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 4
+ 1 1 2.00 60.00 60.00
+ 1215 1125 1080 1305 855 1305 720 1125
+ 0.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1890 1125 1755 1350 1305 1485 810 1485 675 1350 630 1170
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 585 900 585 765 585 630 675 630 675 765 675 900
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1260 900 1260 765 1260 630 1350 630 1350 765 1350 900
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 4
+ 1 1 2.00 60.00 60.00
+ 1890 945 1755 765 1530 765 1395 945
+ 0.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001
+4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001
+4 0 0 50 0 12 10 0.0000 4 105 180 855 1215 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1215 1395 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 540 585 df\001
+4 0 0 50 0 0 10 0.0000 4 105 90 1260 585 F\001
+4 0 0 50 0 0 10 0.0000 4 105 90 1620 720 F\001
+4 0 0 50 0 0 10 0.0000 4 105 60 1620 990 I\001
diff --git a/doc/exfinact.fig b/doc/exfinact.fig
new file mode 100644
index 0000000..3cb98c9
--- /dev/null
+++ b/doc/exfinact.fig
@@ -0,0 +1,29 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 400.500 1575 495 1665 270 1755 495
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 585 90 90 2565 585 2655 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2565 585 135 135 2565 585 2700 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 765 585 135 135 765 585 900 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 135 135 1665 585 1800 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 900 585 1530 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1800 585 2430 585
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 225 630 315 495 405 765 495 585 540 585 630 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 1980 540 nl\001
+4 0 0 50 0 0 10 0.0000 4 105 165 2160 540 /A\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1080 540 a-z\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1575 225 a-z\001
diff --git a/doc/exfinpri.fig b/doc/exfinpri.fig
new file mode 100644
index 0000000..947b29c
--- /dev/null
+++ b/doc/exfinpri.fig
@@ -0,0 +1,55 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 378.000 1530 450 1665 225 1800 450
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1174.891 998.804 1485 540 945 495 630 900
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1237.500 992.500 1485 1575 990 1575 630 1170
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1665.000 1323.000 1530 1395 1665 1170 1800 1395
+ 1 1 2.00 60.00 60.00
+6 720 225 1125 540
+4 0 0 50 0 0 10 0.3840 4 105 165 931 418 /A\001
+4 0 0 50 0 12 10 0.3840 4 105 180 763 485 sp\001
+-6
+6 855 1350 1215 1575
+4 0 0 50 0 12 10 5.8294 4 105 180 871 1429 sp\001
+4 0 0 50 0 0 10 5.8294 4 105 135 1033 1508 /B\001
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 135 135 1665 585 1800 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 585 180 180 1665 585 1845 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 1530 180 180 1665 1530 1845 1530
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1665 1530 135 135 1665 1530 1800 1530
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 90 90 630 1035 720 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 945 1485 630
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 717 1118 1485 1485
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 1080 180 945 270 1215 360 1035 405 1035 495 1035
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 495 990 360 855 270 765 360 675 450 765 585 900
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 2 0 2 0 7 44 0 -1 0.000 0 1 0 4
+ 1 1 2.00 60.00 60.00
+ 1845 1530 2160 1305 2160 810 1845 585
+ 0.000 -1.000 -1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 270 630 sp\001
+4 0 0 50 0 0 10 5.8818 4 105 210 1035 1215 0-9\001
+4 0 0 50 0 0 10 0.3840 4 75 195 945 810 a-z\001
+4 0 0 50 0 0 10 0.0000 4 120 450 1440 180 a-z,0-9\001
+4 0 0 50 0 0 10 0.0000 4 105 210 1530 1125 0-9\001
+4 0 0 50 0 0 10 0.0000 4 105 330 2295 1035 a-z/B\001
diff --git a/doc/exinter.fig b/doc/exinter.fig
new file mode 100644
index 0000000..51bc5df
--- /dev/null
+++ b/doc/exinter.fig
@@ -0,0 +1,48 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1125.000 1777.500 765 360 1125 315 1485 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 1125.000 -877.500 765 540 1125 585 1485 540
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 2025.000 -877.500 1665 540 2025 585 2385 540
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 2925.000 -877.500 2565 540 2925 585 3285 540
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 1 1 0 3825.000 -877.500 3465 540 3825 585 4185 540
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2025.000 1777.500 1665 360 2025 315 2385 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2925.000 1777.500 2565 360 2925 315 3285 360
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3825.000 1777.500 3465 360 3825 315 4185 360
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 675 450 135 135 675 450 810 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 135 135 1575 450 1710 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 135 135 2475 450 2610 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 450 135 135 3375 450 3510 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4275 450 135 135 4275 450 4410 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 4275 450 90 90 4275 450 4365 450
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 135 495 225 360 315 630 405 450 450 450 540 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 5
+ 1 1 2.00 60.00 60.00
+ 4275 585 4320 990 2475 1215 630 990 675 585
+ 0.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 2385 1080 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1035 540 sp\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1935 540 sp\001
+4 0 0 50 0 12 10 0.0000 4 105 180 2835 540 sp\001
+4 0 0 50 0 12 10 0.0000 4 105 180 3735 540 sp\001
+4 0 0 50 0 0 10 0.0000 4 75 195 3735 270 a-z\001
+4 0 0 50 0 0 10 0.0000 4 75 195 2835 270 a-z\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1935 270 a-z\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1035 270 a-z\001
diff --git a/doc/exnegate.fig b/doc/exnegate.fig
new file mode 100644
index 0000000..ceb4a90
--- /dev/null
+++ b/doc/exnegate.fig
@@ -0,0 +1,31 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 1350 180 1710 765
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1530.000 375.000 1440 495 1530 225 1620 495
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 585 135 135 1530 585 1665 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 585 90 90 1530 585 1620 585
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 90 90 585 585 675 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 585 1395 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 585 450 900 135
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 630 135 495 225 765 315 585 360 585 450 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 1440 180 df\001
+4 0 0 50 0 12 10 0.0000 4 105 180 900 540 df\001
+4 0 0 50 0 0 10 0.7854 4 105 210 585 360 0-9\001
+4 0 0 50 0 22 10 0.0000 4 105 165 945 135 Err\001
diff --git a/doc/exoption.fig b/doc/exoption.fig
new file mode 100644
index 0000000..b59f46e
--- /dev/null
+++ b/doc/exoption.fig
@@ -0,0 +1,37 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1395.000 330.000 1305 450 1395 180 1485 450
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3015.000 330.000 2925 450 3015 180 3105 450
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 540 135 135 585 540 720 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1395 540 90 90 1395 540 1485 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1395 540 135 135 1395 540 1530 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2205 540 135 135 2205 540 2340 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3015 540 135 135 3015 540 3150 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3015 540 90 90 3015 540 3105 540
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 540 1260 540
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1530 540 2070 540
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2340 540 2880 540
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 585 135 450 225 720 315 540 360 540 450 540
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 210 900 495 0-9\001
+4 0 0 50 0 0 10 0.0000 4 105 210 1305 135 0-9\001
+4 0 0 50 0 0 10 0.0000 4 15 45 1755 495 .\001
+4 0 0 50 0 0 10 0.0000 4 105 210 2520 495 0-9\001
+4 0 0 50 0 0 10 0.0000 4 105 210 2925 135 0-9\001
diff --git a/doc/exor.fig b/doc/exor.fig
new file mode 100644
index 0000000..5d30b16
--- /dev/null
+++ b/doc/exor.fig
@@ -0,0 +1,65 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 720 990 135 135 720 990 855 990
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 990 135 135 1800 990 1935 990
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 990 90 90 1800 990 1890 990
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 360 90 90 1800 360 1890 360
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 1620 90 90 1800 1620 1890 1620
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 360 135 135 1800 360 1935 360
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1800 1620 135 135 1800 1620 1935 1620
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2700 540 135 135 2700 540 2835 540
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3825 900 135 135 3825 900 3960 900
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3825 900 90 90 3825 900 3915 900
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 855 1665 360
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 855 990 1665 990
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 1125 1665 1620
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1935 360 2565 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2835 585 3690 855
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1800 495 1800 855
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 180 1035 270 900 360 1170 450 990 495 990 585 990
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1935 1665 2745 1665 2880 1665 2880 1575 2745 1575 1935 1575
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1935 1035 2250 1035 2385 1035 2385 945 2250 945 1935 945
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 2 0 1 7 7 50 -1 -1 0.000 0 0 0 2
+ 4455 540 4455 1035
+ 0.000 0.000
+3 2 0 2 0 7 50 -1 -1 0.000 0 1 0 4
+ 1 1 2.00 60.00 60.00
+ 3690 945 3555 1305 4095 1305 3960 945
+ 0.000 -1.000 -1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 195 1530 675 0-9\001
+4 0 0 50 0 0 10 0.0000 4 105 195 1125 945 1-9\001
+4 0 0 50 0 0 10 5.7770 4 120 435 1035 1215 a-z,A-Z\001
+4 0 0 50 0 0 10 0.5061 4 105 75 1080 630 0\001
+4 0 0 50 0 0 10 0.0000 4 105 195 2070 900 0-9\001
+4 0 0 50 0 0 10 0.0000 4 120 660 2070 1530 0-9,a-z,A-Z\001
+4 0 0 50 0 0 12 6.0214 4 75 90 2160 360 x\001
+4 0 0 50 0 0 10 5.9865 4 120 645 2925 540 0-9,a-f,A-F\001
+4 0 0 50 0 0 10 0.0000 4 120 645 3510 1575 0-9,a-f,A-F\001
diff --git a/doc/explus.fig b/doc/explus.fig
new file mode 100644
index 0000000..cb42300
--- /dev/null
+++ b/doc/explus.fig
@@ -0,0 +1,23 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1845.000 375.000 1755 495 1845 225 1935 495
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 585 135 135 1845 585 1980 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 585 90 90 1845 585 1935 585
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 585 1710 585
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 630 135 495 225 765 315 585 360 585 450 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 135 765 810 540 0-9,a-z,A-Z\001
+4 0 0 50 0 0 10 0.0000 4 135 765 1485 180 0-9,a-z,A-Z\001
diff --git a/doc/exstact.fig b/doc/exstact.fig
new file mode 100644
index 0000000..699324e
--- /dev/null
+++ b/doc/exstact.fig
@@ -0,0 +1,33 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1530.000 310.500 1440 405 1530 180 1620 405
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 495 135 135 630 495 765 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1530 495 135 135 1530 495 1665 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 90 90 2430 495 2520 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2430 495 135 135 2430 495 2565 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 495 1395 495
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1665 495 2295 495
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 540 180 405 270 675 360 495 405 495 495 495
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 720 585 855 765 1215 900 1845 900 2205 765 2340 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 360 900 450 a-z/A\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1890 450 sp\001
+4 0 0 50 0 12 10 0.0000 4 105 180 1440 810 sp\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1427 127 a-z\001
diff --git a/doc/exstar.fig b/doc/exstar.fig
new file mode 100644
index 0000000..cca7963
--- /dev/null
+++ b/doc/exstar.fig
@@ -0,0 +1,32 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 1 0 1 1035.000 -742.500 675 675 1035 720 1395 675
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1035.000 1912.500 675 495 1035 450 1395 495
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 90 90 585 585 675 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 585 135 135 585 585 720 585
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1485 585 135 135 1485 585 1620 585
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 540 450 540 315 540 180 630 180 630 315 630 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 630 135 495 225 765 315 585 360 585 450 585
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 1440 450 1440 315 1440 180 1530 180 1530 315 1530 450
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 75 195 945 405 a-z\001
+4 0 0 50 0 12 10 0.0000 4 105 180 945 675 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 180 495 135 nl\001
+4 0 0 50 0 0 10 0.0000 4 75 195 1395 135 a-z\001
diff --git a/doc/exstpri.fig b/doc/exstpri.fig
new file mode 100644
index 0000000..1b3a7ad
--- /dev/null
+++ b/doc/exstpri.fig
@@ -0,0 +1,33 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 630.000 825.000 540 945 630 675 720 945
+ 1 1 2.00 60.00 60.00
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 630 1035 135 135 630 1035 765 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1305 1035 135 135 1305 1035 1440 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1980 1035 135 135 1980 1035 2115 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 135 135 2655 1035 2790 1035
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2655 1035 90 90 2655 1035 2745 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 765 1035 1170 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1440 1035 1845 1035
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2115 1035 2520 1035
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 90 1080 180 945 270 1215 360 1035 405 1035 495 1035
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 12 10 0.0000 4 105 180 540 630 df\001
+4 0 0 50 0 0 10 0.0000 4 105 90 900 990 F\001
+4 0 0 50 0 0 10 0.0000 4 105 60 1575 990 I\001
+4 0 0 50 0 0 10 0.0000 4 105 120 2250 990 N\001
diff --git a/doc/exstrongsubtr.fig b/doc/exstrongsubtr.fig
new file mode 100644
index 0000000..1aca526
--- /dev/null
+++ b/doc/exstrongsubtr.fig
@@ -0,0 +1,65 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+# Generated by dot version 2.2.1 (Fri Sep 30 13:22:44 UTC 2005)
+# For: (age) Adrian Thurston,,,
+# Title: foo
+# Pages: 1
+1200 2
+0 32 #d2d2d2
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 1470.000 376.000 1380 496 1470 226 1560 496
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2306.000 376.000 2216 496 2306 226 2396 496
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 3130.000 364.000 3040 484 3130 214 3220 484
+ 1 1 2.00 60.00 60.00
+5 1 0 2 0 7 50 0 -1 0.000 0 0 1 0 2721.519 538.911 3088 714 2714 945 2356 716
+ 1 1 2.00 60.00 60.00
+# 0
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 603 591 135 135 603 591 738 591
+# 1
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 1474 596 135 135 1474 596 1609 596
+# 2
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 2311 590 135 135 2311 590 2446 590
+# 3
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3135 591 135 135 3135 591 3270 591
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3938 589 135 135 3938 589 4073 589
+# 4
+1 1 0 2 0 7 50 0 -1 0.000 0 0.0000 3938 589 90 90 3938 589 4028 589
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 67 640 157 505 247 775 337 595 382 595 472 595
+ 0.000 1.000 1.000 1.000 1.000 0.000
+# 0 -> 1
+3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2
+ 1 1 2.00 60.00 60.00
+ 747 589 1341 592
+ 0.000 0.000
+# 1 -> 2
+3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1619 597 2179 594
+ 0.000 0.000
+# 1 -> 2
+3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2457 590 3002 590
+ 0.000 0.000
+# 1 -> 2
+3 4 0 2 0 7 50 0 -1 0.000 0 1 0 2
+ 1 1 2.00 60.00 60.00
+ 3284 589 3810 588
+ 0.000 0.000
+4 0 0 50 0 0 10 0.0000 4 75 240 885 536 a..z\001
+4 0 0 50 0 12 10 0.0000 4 105 210 3451 538 nl\001
+4 0 0 50 0 12 10 0.0000 4 105 210 2209 190 df\001
+4 0 0 50 0 0 10 0.0000 4 75 45 1832 542 :\001
+4 0 0 50 0 12 10 0.0000 4 105 210 2624 893 df\001
+4 0 0 50 0 0 10 0.0000 4 75 240 1348 184 a..z\001
+4 0 0 50 0 12 10 0.0000 4 75 210 2610 540 cr\001
+4 0 0 50 0 12 10 0.0000 4 75 210 3015 180 cr\001
diff --git a/doc/exsubtr.fig b/doc/exsubtr.fig
new file mode 100644
index 0000000..0e35990
--- /dev/null
+++ b/doc/exsubtr.fig
@@ -0,0 +1,87 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 1395 270 3555 630
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 135 135 1575 450 1710 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 135 135 2475 450 2610 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 450 135 135 3375 450 3510 450
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 675 1215 135 135 675 1215 810 1215
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1215 90 90 2475 1215 2565 1215
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1215 135 135 2475 1215 2610 1215
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 3375 1980 135 135 3375 1980 3510 1980
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1980 135 135 2475 1980 2610 1980
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 1980 135 135 1575 1980 1710 1980
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 450 90 90 2475 450 2565 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 450 90 90 1575 450 1665 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1575 1980 90 90 1575 1980 1665 1980
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2475 1980 90 90 2475 1980 2565 1980
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 1080 1440 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 720 1350 1440 1980
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 810 1215 2340 1215
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1665 540 2385 1125
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1665 1890 2385 1305
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2475 1845 2475 1350
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1710 1980 2340 1980
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2610 1980 3240 1980
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1710 450 2340 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2610 450 3240 450
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 2475 585 2475 1080
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 3285 540 2565 1125
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 3285 1890 2565 1305
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 135 1260 225 1125 315 1395 405 1215 450 1215 540 1215
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 2610 1260 3015 1260 3150 1260 3150 1170 3015 1170 2610 1170
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 0 10 0.0000 4 105 45 990 720 i\001
+4 0 0 50 0 0 10 0.0000 4 105 60 1125 1620 f\001
+4 0 0 50 0 0 10 0.0000 4 135 660 1215 1170 a-e,g-h,j-z\001
+4 0 0 50 0 0 10 0.6807 4 75 195 2880 810 a-z\001
+4 0 0 50 0 0 10 5.6025 4 75 195 2925 1530 a-z\001
+4 0 0 50 0 0 10 0.0000 4 75 210 2520 720 u-z\001
+4 0 0 50 0 0 10 0.0000 4 105 195 2205 1755 a-q\001
+4 0 0 50 0 0 10 0.0000 4 75 195 2520 1755 s-z\001
+4 0 0 50 0 0 10 0.0000 4 75 180 2205 720 a-s\001
+4 0 0 50 0 0 10 0.0000 4 75 75 1980 1935 o\001
+4 0 0 50 0 0 10 0.0000 4 75 60 2835 1935 r\001
+4 0 0 50 0 0 10 0.0000 4 90 60 2835 405 t\001
+4 0 0 50 0 0 10 0.0000 4 75 75 1935 405 n\001
+4 0 0 50 0 0 10 5.6025 4 105 495 1845 630 a-m,o-z\001
+4 0 0 50 0 0 10 0.6807 4 105 450 1800 1710 a-n,p-z\001
+4 0 0 50 0 0 10 0.0000 4 75 195 2835 1125 a-z\001
diff --git a/doc/opconcat.fig b/doc/opconcat.fig
new file mode 100644
index 0000000..312e301
--- /dev/null
+++ b/doc/opconcat.fig
@@ -0,0 +1,43 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 225 180 1530 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 630 135 135 585 630 720 630
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 90 90 1215 450 1305 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 135 135 1215 450 1350 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 810 90 90 1215 810 1305 810
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 810 135 135 1215 810 1350 810
+3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8
+ 225 630 495 270 1125 180 1485 270 1530 630 1485 990
+ 1125 1080 495 990
+ 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+-6
+6 1980 180 3285 1080
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 450 90 90 2970 450 3060 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 450 135 135 2970 450 3105 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2340 630 135 135 2340 630 2475 630
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 810 135 135 2970 810 3105 810
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2970 810 90 90 2970 810 3060 810
+3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8
+ 1980 630 2250 270 2880 180 3240 270 3285 630 3240 990
+ 2880 1080 2250 990
+ 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+-6
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1350 810 2205 675
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1350 450 2205 585
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 675 135 540 225 810 315 630 360 630 450 630
+ 0.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 50 0 32 10 0.0000 4 75 75 1710 450 e\001
+4 0 0 50 0 32 10 0.0000 4 75 75 1710 900 e\001
diff --git a/doc/opor.fig b/doc/opor.fig
new file mode 100644
index 0000000..7dbb8ca
--- /dev/null
+++ b/doc/opor.fig
@@ -0,0 +1,42 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 0 765 765 1170
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 585 945 135 135 585 945 720 945
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 45 990 135 855 225 1125 315 945 360 945 450 945
+ 0.000 1.000 1.000 1.000 1.000 0.000
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 1440 135 135 1215 1440 1350 1440
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1260 90 90 1845 1260 1935 1260
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1260 135 135 1845 1260 1980 1260
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 270 90 90 1845 270 1935 270
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 270 135 135 1845 270 1980 270
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1215 450 135 135 1215 450 1350 450
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 630 135 135 1845 630 1980 630
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 630 90 90 1845 630 1935 630
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1620 90 90 1845 1620 1935 1620
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1845 1620 135 135 1845 1620 1980 1620
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 675 855 1125 540
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 675 1035 1125 1350
+3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8
+ 855 1440 1125 1080 1755 990 2115 1080 2160 1440 2115 1800
+ 1755 1890 1125 1800
+ 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8
+ 855 450 1125 90 1755 0 2115 90 2160 450 2115 810
+ 1755 900 1125 810
+ 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+4 0 0 50 0 32 10 0.0000 4 75 75 720 1260 e\001
+4 0 0 50 0 32 10 0.0000 4 75 75 720 720 e\001
diff --git a/doc/opstar.fig b/doc/opstar.fig
new file mode 100644
index 0000000..5bac654
--- /dev/null
+++ b/doc/opstar.fig
@@ -0,0 +1,49 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 360 495 1125 900
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 945 675 135 135 945 675 1080 675
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 405 720 495 585 585 855 675 675 720 675 810 675
+ 0.000 1.000 1.000 1.000 1.000 0.000
+-6
+6 2070 135 2430 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 315 90 90 2250 315 2340 315
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 315 135 135 2250 315 2385 315
+-6
+6 969 -122 1329 238
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1149 58 90 90 1149 58 1239 58
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1149 58 135 135 1149 58 1284 58
+-6
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 1620 495 135 135 1620 495 1755 495
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 675 135 135 2250 675 2385 675
+1 3 0 2 0 7 50 0 -1 0.000 1 0.0000 2250 675 90 90 2250 675 2340 675
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 1080 630 1485 540
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 1 1 2.00 60.00 60.00
+ 973 543 1103 203
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 2385 360 2700 630 2700 1215 1980 1395 1260 1125 978 801
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 2 0 7 50 0 -1 0.000 0 1 0 6
+ 1 1 2.00 60.00 60.00
+ 2385 720 2520 855 2475 1125 1935 1215 1395 1035 1067 730
+ 0.000 1.000 1.000 1.000 1.000 0.000
+3 1 0 1 0 7 50 0 -1 0.000 0 0 0 8
+ 1260 495 1530 135 2160 45 2520 135 2565 495 2520 855
+ 2160 945 1530 855
+ 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+4 0 0 50 0 32 10 0.0000 4 75 75 1845 1125 e\001
+4 0 0 50 0 32 10 0.0000 4 75 75 1845 1440 e\001
+4 0 0 50 0 32 10 0.0000 4 75 75 1156 549 e\001
+4 0 0 50 0 32 10 0.0000 4 75 75 896 442 e\001
diff --git a/doc/ragel-guide.tex b/doc/ragel-guide.tex
new file mode 100644
index 0000000..db5f88f
--- /dev/null
+++ b/doc/ragel-guide.tex
@@ -0,0 +1,2628 @@
+%
+% Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+%
+
+% This file is part of Ragel.
+%
+% Ragel is free software; you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 2 of the License, or
+% (at your option) any later version.
+%
+% Ragel is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with Ragel; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+\documentclass[letterpaper,12pt,oneside]{book}
+\usepackage{pslatex}
+\usepackage{graphics}
+\usepackage{comment}
+\usepackage{multicol}
+\usepackage[medium]{titlesec}
+
+\topmargin 0in
+\oddsidemargin 0in
+\textwidth 6.5in
+\textheight 8.5in
+
+\setlength{\parskip}{0pt}
+\setlength{\topsep}{0pt}
+\setlength{\partopsep}{0pt}
+\setlength{\itemsep}{0pt}
+
+\input{version}
+
+\newcommand{\verbspace}{\vspace{10pt}}
+\newcommand{\graphspace}{\vspace{10pt}}
+
+\renewcommand\floatpagefraction{.99}
+\renewcommand\topfraction{.99}
+\renewcommand\bottomfraction{.99}
+\renewcommand\textfraction{.01}
+\setcounter{totalnumber}{50}
+\setcounter{topnumber}{50}
+\setcounter{bottomnumber}{50}
+
+\begin{document}
+
+%
+% Title page
+%
+\thispagestyle{empty}
+\begin{center}
+\vspace*{3in}
+{\huge Ragel State Machine Compiler}\\
+\vspace*{12pt}
+{\Large User Guide}\\
+\vspace{1in}
+by\\
+\vspace{12pt}
+{\large Adrian Thurston}\\
+\end{center}
+\clearpage
+
+\pagenumbering{roman}
+
+%
+% License page
+%
+\chapter*{License}
+Ragel version \version, \pubdate\\
+Copyright \copyright\ 2003, 2004, 2005, 2006 Adrian Thurston
+\vspace{6mm}
+
+{\bf\it\noindent This document is part of Ragel, and as such, this document is
+released under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2 of the License, or (at your option)
+any later version.}
+
+\vspace{5pt}
+
+{\bf\it\noindent Ragel is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+details.}
+
+\vspace{5pt}
+
+{\bf\it\noindent You should have received a copy of the GNU General Public
+License along with Ragel; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA}
+
+%
+% Table of contents
+%
+\clearpage
+\tableofcontents
+\clearpage
+
+%
+% Chapter 1
+%
+
+\pagenumbering{arabic}
+
+\chapter{Introduction}
+
+\section{Abstract}
+
+Regular expressions are used heavily in practice for the purpose of specifying
+parsers. However, they are normally used as black boxes linked together with
+program logic. User actions are associated with entire expressions and matched
+text is extracted from input. With these facilities it is not possible to
+specify an entire parser with a single regular expression because practical
+parsing tasks invariably involve the execution of arbitrary user code
+throughout the course of parsing.
+
+Ragel is a software development tool which allows the user to embed actions into
+regular expressions without disrupting the regular expression syntax.
+Consequently, one can specify an entire parser using a single regular
+experssion. The single-expression model affords concise
+and elegant descriptions of languages and the generation of very simple,
+fast and robust code. Ragel compiles finite state machines from a high level
+regular language notation to executable C, C++, Objective-C or D.
+
+In addition to building state machines from regular expressions, Ragel allows
+the programmer to directly specify state machines with state charts. These two
+notations may also be freely combined. There are facilities for controlling
+nondeterminism in the resulting machines and building scanners using the
+longest-match paradigm. Ragel can produce code that runs as fast as manually
+constructed machines. Ragel can handle integer-sized alphabets and can compile
+very large state machines.
+
+\section{Motivation}
+
+When a programmer is faced with the task of producing a parser for a
+context-free language there are many tools to choose from. It is quite common
+to generate useful and efficient parsers for programming languages from a
+formal grammar. It is also quite common for programmers to avoid such tools
+when making parsers for simple computer languages, such as file formats and
+communication protocols. Such languages often meet the criteria for the
+regular languages. Tools for processing the context-free languages are simply
+too heavyweight for the purpose of parsing regular languages because the extra
+run-time effort required for supporting the recursive nature of context-free
+languages is wasted.
+
+Regular expressions are more appropriate than context-free grammars for a large
+number of parsing probelems. Parsers based on them have many advantages over
+hand written parsers. Regular expression syntax is convenient,
+concise and easy to maintain. Existing
+parsing tools based on regular expressions, such as Lex, Re2C, Sed, Awk and
+Perl, are normally split into two levels: a regular expression matching engine
+and some kind of program logic for linking patterns together and executing user
+code.
+
+As an example, Lex requires the user to consider a language as a sequence
+of independent patterns.
+Unfortunately, there are many computer languages that are considered regular,
+which do not fit this model. This model also places restrictions on when action
+code may be executed. Since action code can only be associated with complete
+patterns, if action code must be executed before an entire pattern is matched
+then the pattern must be broken into smaller units. Instead of being forced to
+disrupt the regular expression syntax, it is desirable to retain a single
+expression and embed code for performing actions directly into the transitions
+which move over the characters. After all we know the transitions are there.
+
+Perl allows one to link patterns together using arbitrary program code. This
+is very flexible and powerful, however we can be more concise, clear and robust
+if we avoid gluing together regular expressions with if statements and while
+loops, and instead only compose parsers with regular expression operators. To
+achieve this we require an action execution model for associating code with the
+sub-expressions of a regular expression in a way that does not disrupt its
+syntax.
+
+The primary goal of Ragel is therefore to provide developers with an ability to embed
+actions into the transitions and states of a regular expression in support the
+definition of entire parsers or large sections of parsers using a single
+regular expression that is compiled to a simple state machine. From the
+regular expression we gain a clear and concise statement of our language. From
+the state machine we obtain a very fast and robust executable that lends itself
+to many kinds of analysis and visualization.
+
+\section{Overview}
+
+Ragel is a language for specifying state machines. The Ragel program is a
+compiler that assembles a state machine definition to executable code. Ragel
+is based on the principle that any regular language can be converted to a
+deterministic finite state automaton. Since every regular language has a state
+machine representation and vice versa, the terms regular language and state
+machine (or just machine) will be used interchangeably in this document.
+
+Ragel outputs machines to C, C++, Objective-C, or D code. The output is
+designed to be generic and is not bound to any particular input or processing
+method. A Ragel machine expects to have data passed to it in buffer blocks.
+When there is no more input, the machine can be queried for acceptance. In
+this way, a Ragel machine can be used to simply recognize a regular language
+like a regular expression library. By embedding code into the regular language,
+a Ragel machine can also be used to parse input.
+
+The Ragel input language has many operators for constructing and manipulating
+machines. Machines are built up from smaller machines, to bigger ones, to the
+final machine representing the language that needs to be recognized or parsed.
+
+The core state machine construction operators are those found in most ``Theory
+of Computation'' textbooks. They date back to the 1950s and are widely studied.
+They are based on set operations and permit one to think of languages as a set
+of strings. They are Union, Intersection, Subtraction, Concatenation and Kleene
+Star. Put together, these operators make up what most people know as regular
+expressions. Ragel also provides a longest-match construction for easily
+building scanners and provides operators for explicitly constructing machines
+using a state chart method. In the state chart method one joins machines
+together without any implied transitions and then explicitly specifies where
+epsilon transitions should be drawn.
+
+The state machine manipulation operators are specific to Ragel. They allow the
+programmer to access the states and transitions of regular languages. There are
+two uses of the manipulation operators. The first and primary use is to embed
+code into transitions and states, allowing the programmer to specify the
+actions of the state machine.
+
+Following a number of action embeddings, a single transition can have a number
+of actions embedded in it. When making a nondeterministic specification into a
+DFA using machines that have embedded actions, new transitions are often made
+that have the combined actions of several source transitions. Ragel ensures
+that multiple actions associated with a single transition are ordered
+consistently with respect to the order of reference and the natural ordering
+implied by the construction operators.
+
+The second use of the manipulation operators is to assign priorities in
+transitions. Priorities provide a convenient way of controlling any
+nondeterminism introduced by the construction operators. Suppose two
+transitions leave from the same state and go to distinct target states on the
+same character. If these transitions are assigned conflicting priorities, then
+during the determinization process the transition with the higher priority will
+take precedence over the transition with the lower priority. The lower priority
+transition gets abandoned. The transitions would otherwise be combined to a new
+transition that goes to a new state which is a combination of the original
+target states. Priorities are often required for segmenting machines. The most
+common uses of priorities have been encoded into a set of simple operators
+which should be used instead of priority embeddings whenever possible.
+
+There are four operators for embedding actions and priorities into the
+transitions of a state machine, these correspond to the different
+classes of transitions in a machine. It is possible to embed into start
+transitions, finishing transitions, all transitions or pending out
+transitions. The embedding of pending out transitions is a special case.
+These transition embeddings gets stored in the final states of a machine. They
+are transferred to any transitions that may be made going out of the machine by
+a concatenation or kleene star operator.
+
+There are several more operators for embedding actions into states. Like the
+transition embeddings, there are various different classes of states that the
+embedding operators access. For example, one can access start states, final
+states or all states, among others. Unlike the transition
+embeddings, there
+are several different types of state action embeddings. These are executed at various
+different times during the processing of input. It is possible to embed
+actions which are exectued on all transitions into a state, all transitions out of a state,
+transitions taken on the error event or on the EOF event.
+
+Within actions, it is possible to influence the behaviour of the state machine.
+The user can write action code that jumps or calls to another portion of the
+machine, changes the current character being processed, or breaks out of the
+processing loop. With the state machine calling feature Ragel can be used to
+parse languages which are not regular. For example, one can parse balanced
+parentheses by calling into a parser when an open bracket character is seen and
+returning to the state on the top of the stack when the corresponding closing
+bracket character is seen. More complicated context-free languages such as
+expressions in C, are out of the scope of Ragel.
+
+Ragel provides a longest-match construction operator which eases the task of
+building scanners. This construction behaves much like the primary processing
+model of Lex. The generated code, which relies on user-defined variables for
+backtracking, repeatedly tries to match patterns to the input, favouring longer
+patterns over shorter ones and patterns that appear ahead of others when the
+lengths of the possible matches are identical. When a pattern is matched the
+associated action is executed. Longest-match machines take Ragel out of the
+domain of pure state machines and require the user to maintain the backtracking
+related variables. However, longest-match machines integrate well with regular
+state machine instantiations. They can be called to or jumped to only when
+needed, or they can be called out of or jumped out of when a simpler, pure
+state machine model is needed.
+
+Two types of output code style are available. Ragel can produce a table-driven
+machine or a directly executable machine. The directly executable machine is much
+faster than the table-driven. On the other hand, the table-driven machine is
+more compact and less demanding on the host language compiler. It is better
+suited to compiling large state machines and in the future will be used for
+coverage statistics gathering and debugging.
+
+\section{Related Work}
+
+Lex is perhaps the best-known tool for constructing parsers from regular
+expressions. In the Lex processing model, generated code attempts to match one
+of the user's regular expression patterns, favouring longer matches over
+shorter ones. Once a match is made it then executes the code associated with
+the pattern and consumes the matching string. This process is repeated until
+the input is fully consumed.
+
+Through the use of start conditions, related sets of patterns may be defined.
+The active set may be changed at any time. This allows the user to define
+different lexical regions. It also allows the user to link patterns together by
+requiring that some patterns come before others. This is quite like a
+concatenation operation. However, use of Lex for languages that require a
+considerable amount of pattern concatenation is inappropriate. In such cases a
+Lex program deteriorates into a manually specified state machine, where start
+conditions define the states and pattern actions define the transitions. Lex
+is therefore best suited to parsing tasks where the language to be parsed can
+be described in terms of regions of tokens.
+
+Lex is useful in many scenarios and has undoubtedly stood the test of time.
+There are, however, several drawbacks to using Lex. Lex can impose too much
+overhead for parsing applications where buffering is not required because all
+the characters are available in a single string. In these cases there is
+structure to the language to be parsed and a parser specification tool can
+help, but employing a heavyweight processing loop that imposes a stream
+``pull'' model and dynamic input buffer allocation is inappropriate. An
+example of this kind of scenario is the conversion of floating point numbers
+contained in a string to their corresponding numerical values.
+
+Another drawback is that
+Lex patterns are black boxes. It is not possbile to execute a user action while
+matching a character contained inside a pattern. For example, if scanning a
+programming language and string literals can contain newlines which must be
+counted, a Lex user must break up a string literal pattern so as to associate
+an action with newlines. This forces the definition of a new start condition.
+Alternatively the user can reprocess the text of the matched string literal to
+count newlines.
+
+\begin{comment}
+How ragel is different from Lex.
+
+%Like Re2c, Ragel provides a simple execution model that does not make any
+%assumptions as to how the input is collected. Also, Ragel does not do any
+%buffering in the generated code. Consequently there are no dependencies on
+%external functions such as \verb|malloc|.
+
+%If buffering is required it can be manually implemented by embedding actions
+%that copy the current character to a buffer, or data can be passed to the
+%parser using known block boundaries. If the longest-match operator is used,
+%Ragel requires the user to ensure that the ending portion of the input buffer
+%is preserved when the buffer is exhaused before a token is fully matched. The
+%user should move the token prefix to a new memory location, such as back to the
+%beginning of the input buffer, then place the subsequently read input
+%immediately after the prefix.
+
+%These properties of Ragel make it more work to write a program that requires
+%the longest-match operator or buffering of input, however they make Ragel a
+%more flexible tool that can produce very simple and fast-running programs under
+%a variety of input acquisition arrangements.
+
+%In Ragel, it is not necessary
+%to introduce start conditions to concatenate tokens and retain action
+%execution. Ragel allows one to structure a parser as a series of tokens, but
+%does not require it.
+
+%Like Lex and Re2C, Ragel is able to process input using a longest-match
+%execution model, however the core of the Ragel language specifies parsers at a
+%much lower level. This core is built around a pure state machine model. When
+%building basic machines there is no implied algorithm for processing input
+%other than to move from state to state on the transitions of the machine. This
+%core of pure state machine operations makes Ragel well suited to handling
+%parsing problems not based on token scanning. Should one need to use a
+%longest-match model, the functionality is available and the lower level state
+%machine construction facilities can be used to specify the patterns of a
+%longest-match machine.
+
+%This is not possible in Ragel. One can only program
+%a longest-match instantiation with a fixed set of rules. One can jump to
+%another longest-match machine that employs the same machine definitions in the
+%construction of its rules, however no states will be shared.
+
+%In Ragel, input may be re-parsed using a
+%different machine, but since the action to be executed is associated with
+%transitions of the compiled state machine, the longest-match construction does
+%not permit a single rule to be excluded from the active set. It cannot be done
+%ahead of time nor in the excluded rule's action.
+\end{comment}
+
+The Re2C program defines an input processing model similar to that of Lex.
+Unlike Lex, Re2C focuses on making generated state machines run very fast and
+integrate easily into any program, free of dependencies. Re2C generates
+directly executable code and is able to claim that generated parsers run nearly
+as fast as their hand-coded equivalents. This is very important for user
+adoption, as programmers are reluctant to use a tool when a faster alternative
+exists. A consideration to ease of use is also important because developers
+need the freedom to integrate the generated code as they see fit.
+
+Many scripting languages provide ways of composing parsers by linking regular
+expressions using program logic. For example, Sed and Awk are two established
+Unix scripting tools that allow the programmer to exploit regular expressions
+for the purpose of locating and extracting text of interest. High-level
+programming languages such as Perl, Python, PHP and Ruby all provide regular
+expression libraries that allow the user to combine regular expressions with
+arbitrary code.
+
+In addition to supporting the linking of regular expressions with arbitrary
+program logic, the Perl programming language permits the embedding of code into
+regular expressions. Perl embeddings do not translate into the embedding of
+code into deterministic state machines. Perl regular expressions are in fact
+not fully compiled to deterministic machines when embedded code is involved.
+They are instead interpreted and involve backtracking. This is shown by the
+following Perl program. When it is fed the input \verb|abcd| the interpretor
+attempts to match the first alternative, printing \verb|a1 b1|. When this
+possibility fails it backtracks and tries the second possibility, printing
+\verb|a2 b2|, at which point it succeeds. A similar parser expressed in Ragel
+will attempt both of the alternatives concurrently, printing
+\verb|a1 a2 b1 b2|.
+
+\verbspace
+\begin{verbatim}
+print "YES\n" if ( <STDIN> =~
+ /( a (?{ print "a1 "; }) b (?{ print "b1 "; }) cX ) |
+ ( a (?{ print "a2 "; }) b (?{ print "b2 "; }) cd )/x )
+\end{verbatim}
+
+\section{Development Status}
+
+Ragel is a relatively new tool and is under continuous development. As a rough
+release guide, minor revision number changes are for implementation
+improvements and feature additions. Major revision number changes are for
+implementation and language changes that do not preserve backwards
+compatibility. Though in the past this has not always held true: changes that
+break code have crept into minor version number changes. Typically, the
+documentation lags behind the development in the interest of documenting only
+the lasting features. The latest changes are always documented in the ChangeLog
+file. As Ragel stabilizes, which is expected in the 5.x line, the version
+numbering rules will become more strict and the documentation will become more
+plentiful.
+
+
+\chapter{Constructing State Machines}
+
+\section{Ragel State Machine Specifications}
+
+A Ragel input file consists of a host language code file with embedded machine
+specifications. Ragel normally passes input straight to output. When it sees
+a machine specification it stops to read the Ragel statements and possibly generate
+code in place of the specification.
+Afterwards it continues to pass input through. There
+can be any number of FSM specifications in an input file. A multi-line FSM spec
+starts with \verb|%%{| and ends with \verb|}%%|. A single-line FSM spec starts
+with \verb|%%| and ends at the first newline.
+
+While Ragel is looking for FSM specifications it does basic lexical analysis on
+the surrounding input. It interprets literal strings and comments so a
+\verb|%%| sequence in either of those will not trigger the parsing of an FSM
+specification. Ragel does not pass the input through any preprocessor nor does it
+interpret preprocessor directives itself so includes, defines and ifdef logic
+cannot be used to alter the parse of a Ragel input file. It is therefore not
+possible to use an \verb|#if 0| directive to comment out a machine as is
+commonly done in C code. As an alternative, a machine can be prevented from
+causing any generated output by commenting out the write statements.
+
+In Figure \ref{cmd-line-parsing}, a multi-line machine is used to define the
+machine and single line machines are used to trigger the writing of the machine
+data and execution code.
+
+\begin{figure}
+\begin{multicols}{2}
+\small
+\begin{verbatim}
+#include <string.h>
+#include <stdio.h>
+
+%%{
+ machine foo;
+ main :=
+ ( 'foo' | 'bar' )
+ 0 @{ res = 1; };
+}%%
+
+%% write data noerror nofinal;
+\end{verbatim}
+\columnbreak
+\begin{verbatim}
+int main( int argc, char **argv )
+{
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ char *pe = p + strlen(p) + 1;
+ %% write init;
+ %% write exec;
+ }
+ printf("result = %i\n", res );
+ return 0;
+}
+\end{verbatim}
+\end{multicols}
+\caption{Parsing a command line argument.}
+\label{cmd-line-parsing}
+\end{figure}
+
+
+\subsection{Naming Ragel Blocks}
+
+\begin{verbatim}
+machine fsm_name;
+\end{verbatim}
+\verbspace
+
+The \verb|machine| statement gives the name of the FSM. If present in a
+specification, this statement must appear first. If a machine specification
+does not have a name then Ragel uses the previous specification name. If no
+previous specification name exists then this is an error. Because FSM
+specifications persist in memory, a machine's statements can be spread across
+multiple machine specifications. This allows one to break up a machine across
+several files or draw in statements that are common to multiple machines using
+the include statement.
+
+\subsection{Including Ragel Code}
+
+\begin{verbatim}
+include FsmName "inputfile.rl";
+\end{verbatim}
+\verbspace
+
+The \verb|include| statement can be used to draw in the statements of another FSM
+specification. Both the name and input file are optional, however at least one
+must be given. Without an FSM name, the given input file is searched for an FSM
+of the same name as the current specification. Without an input file the
+current file is searched for a machine of the given name. If both are present,
+the given input file is searched for a machine of the given name.
+
+\subsection{Machine Definition}
+\label{definition}
+
+\begin{verbatim}
+<name> = <expression>;
+\end{verbatim}
+\verbspace
+
+The machine definition statement associates an FSM expression with a name. Machine
+expressions assigned to names can later be referenced by other expressions. A
+definition statement on its own does not cause any states to be generated. It is simply a
+description of a machine to be used later. States are generated only when a definition is
+instantiated, which happens when a definition is referenced in an instantiated
+expression.
+
+\subsection{Machine Instantiation}
+\label{instantiation}
+
+\begin{verbatim}
+<name> := <expression>;
+\end{verbatim}
+\verbspace
+
+The machine instantiation statement generates a set of states representing an expression and
+associates a name with the entry point. Each instantiation generates a distinct
+set of states. At a very minimum the \verb|main| machine must be instantiated.
+Other machines may be instantiated and control passed to them by use of
+\verb|fcall|, \verb|fgoto| or \verb|fnext| statements.
+
+\begin{comment}
+\subsection{Write Statement}
+
+\begin{verbatim}
+write <component> [options];
+\end{verbatim}
+\verbspace
+
+The write statement is used to generate parts of the machine. There are four
+components that can be generated: the state machine's static data, the
+initialization code, the execution code and the EOF action execution code. The
+write statement is described in detail in Section \ref{write-statement}.
+\end{comment}
+
+\section{Lexical Analysis of an FSM Specification}
+\label{lexing}
+
+Within a machine specification the following lexical rules apply to the parse
+of the input.
+
+\begin{itemize}
+
+\item The \verb|#| symbol begins a comment that terminates at the next newline.
+
+\item The symbols \verb|""|, \verb|''|, \verb|//|, \verb|[]| behave as the
+delimiters of literal strings. With them, the following escape sequences are interpreted:
+
+\verb| \0 \a \b \t \n \v \f \r|
+
+A backslash at the end of a line joins the following line onto the current. A
+backslash preceding any other character removes special meaning. This applies
+to terminating characters and to special characters in regular expression
+literals. As an exception, regular expression literals do not support escape
+sequences as the operands of a range within a list. See the bullet on regular
+expressions in Section \ref{basic}.
+
+\item The symbols \verb|{}| delimit a block of host language code that will be
+embedded into the machine as an action. Within the block of host language
+code, basic lexical analysis of C/C++ comments and strings is done in order to
+correctly find the closing brace of the block. With the exception of FSM
+commands embedded in code blocks, the entire block is preserved as is for
+identical reproduction in the output code.
+
+\item The pattern \verb|[+-]?[0-9]+| denotes an integer in decimal format.
+Integers used for specifying machines may be negative only if the alphabet type
+is signed. Integers used for specifying priorities may be positive or negative.
+
+\item The pattern \verb|0x[0-9a-fA-f]+| denotes an integer in hexadecimal
+format.
+
+\item The keywords are \verb|access|, \verb|action|, \verb|alphtype|,
+\verb|getkey|, \verb|write|, \verb|machine| and \verb|include|.
+
+\item The pattern \verb|[a-zA-Z_][a-zA-Z_0-9]*| denotes an identifier.
+
+%\item The allowable symbols are:
+%
+%\verb/ ( ) ! ^ * ? + : -> - | & . , := = ; > @ $ % /\\
+%\verb| >/ $/ %/ </ @/ <>/ >! $! %! <! @! <>!|\\
+%\verb| >^ $^ %^ <^ @^ <>^ >~ $~ %~ <~ @~ <>~|\\
+%\verb| >* $* %* <* @* <>*|
+
+\item Any amount of whitespace may separate tokens.
+
+\end{itemize}
+
+%\section{Parse of an FSM Specification}
+
+%The following statements are possible within an FSM specification. The
+%requirements for trailing semicolons loosely follow that of C.
+%A block
+%specifying code does not require a trailing semicolon. An expression
+%statement does require a trailing semicolon.
+
+
+\section{Basic Machines}
+\label{basic}
+
+The basic machines are the base operands of regular language expressions. They
+are the smallest unit to which machine construction and manipulation operators
+can be applied.
+
+In the diagrams that follow the symbol \verb|df| represents
+the default transition, which is taken if no other transition can be taken. The
+symbol \verb|cr| represents the carriage return character, \verb|nl| represents the newline character (aka line feed) and the symbol
+\verb|sp| represents the space character.
+
+\begin{itemize}
+
+\item \verb|'hello'| -- Concatenation Literal. Produces a machine that matches
+the sequence of characters in the quoted string. If there are 5 characters
+there will be 6 states chained together with the characters in the string. See
+Section \ref{lexing} for information on valid escape sequences.
+
+\begin{center}
+\includegraphics{bmconcat}
+\end{center}
+
+It is possible
+to make a concatenation literal case-insensitive by appending an \verb|i| to
+the string, for example \verb|'cmd'i|.
+
+\item \verb|"hello"| -- Identical to the single quoted version.
+
+\item \verb|[hello]| -- Or Expression. Produces a union of characters. There
+will be two states with a transition for each unique character between the two states.
+The \verb|[]| delimiters behave like the quotes of a literal string. For example,
+\verb|[ \t]| means tab or space. The or expression supports character ranges
+with the \verb|-| symbol as a separator. The meaning of the union can be negated
+using an initial \verb|^| character as in standard regular expressions.
+See Section \ref{lexing} for information on valid escape sequences
+in or expressions.
+
+\begin{center}
+\includegraphics{bmor}
+\end{center}
+
+\item \verb|''|, \verb|""|, and \verb|[]| -- Zero Length Machine. Produces a machine
+that matches the zero length string. Zero length machines have one state that is both
+a start state and a final state.
+
+\begin{center}
+\includegraphics{bmnull}
+\end{center}
+
+\item \verb|number| -- Simple Machine. Produces a two state machine with one
+transition on the given number. The number may be in decimal or hexadecimal
+format and should be in the range allowed by the alphabet type. The minimum and
+maximum values permitted are defined by the host machine that Ragel is compiled
+on. For example, numbers in a \verb|short| alphabet on an i386 machine should
+be in the range \verb|-32768| to \verb|32767|.
+
+\begin{center}
+\includegraphics{bmnum}
+\end{center}
+
+\item \verb|/simple_regex/| -- Regular Expression. Regular expressions are
+parsed as a series of expressions that will be concatenated together. Each
+concatenated expression
+may be a literal character, the any character specified by the \verb|.|
+symbol, or a union of characters specified by the \verb|[]| delimiters. If the
+first character of a union is \verb|^| then it matches any character not in the
+list. Within a union, a range of characters can be given by separating the first
+and last characters of the range with the \verb|-| symbol. Each
+concatenated machine may have repetition specified by following it with the
+\verb|*| symbol. The standard escape sequences described in Section
+\ref{lexing} are supported everywhere in regular expressions except as the
+operands of a range within in a list. This notation also supports the \verb|i|
+trailing option. Use it to produce case-insensitive machines, as in \verb|/GET/i|.
+
+Ragel does not support very complex regular expressions because the desired
+results can always be achieved using the more general machine construction
+operators listed in Section \ref{machconst}. The following diagram shows the
+result of compiling \verb|/ab*[c-z].*[123]/|.
+
+\begin{center}
+\includegraphics{bmregex}
+\end{center}
+
+\item \verb|lit .. lit| -- Range. Produces a machine that matches any
+characters in the specified range. Allowable upper and lower bounds of the
+range are concatenation literals of length one and number literals. For
+example, \verb|0x10..0x20|, \verb|0..63|, and \verb|'a'..'z'| are valid ranges.
+The bounds should be in the range allowed by the alphabet type.
+
+\begin{center}
+\includegraphics{bmrange}
+\end{center}
+
+\item \verb|variable_name| -- Lookup the machine definition assigned to the
+variable name given and use an instance of it. See Section \ref{definition} for
+an important note on what it means to reference a variable name.
+
+\item \verb|builtin_machine| -- There are several built-in machines available
+for use. They are all two state machines for the purpose of matching common
+classes of characters. They are:
+
+\begin{itemize}
+
+\item \verb|any | -- Any character in the alphabet.
+
+\item \verb|ascii | -- Ascii characters. \verb|0..127|
+
+\item \verb|extend| -- Ascii extended characters. This is the range
+\verb|-128..127| for signed alphabets and the range \verb|0..255| for unsigned
+alphabets.
+
+\item \verb|alpha | -- Alphabetic characters. \verb|[A-Za-z]|
+
+\item \verb|digit | -- Digits. \verb|[0-9]|
+
+\item \verb|alnum | -- Alpha numerics. \verb|[0-9A-Za-z]|
+
+\item \verb|lower | -- Lowercase characters. \verb|[a-z]|
+
+\item \verb|upper | -- Uppercase characters. \verb|[A-Z]|
+
+\item \verb|xdigit| -- Hexadecimal digits. \verb|[0-9A-Fa-f]|
+
+\item \verb|cntrl | -- Control characters. \verb|0..31|
+
+\item \verb|graph | -- Graphical characters. \verb|[!-~]|
+
+\item \verb|print | -- Printable characters. \verb|[ -~]|
+
+\item \verb|punct | -- Punctuation. Graphical characters that are not alphanumerics.
+\verb|[!-/:-@[-`{-~]|
+
+\item \verb|space | -- Whitespace. \verb|[\t\v\f\n\r ]|
+
+\item \verb|zlen | -- Zero length string. \verb|""|
+
+\item \verb|empty | -- Empty set. Matches nothing. \verb|^any|
+
+\end{itemize}
+\end{itemize}
+
+\section{Operator Precedence}
+The following table shows operator precedence from lowest to highest. Operators
+in the same precedence group are evaluated from left to right.
+
+\verbspace
+\begin{tabular}{|c|c|c|}
+\hline
+1&\verb| , |&Join\\
+\hline
+2&\verb/ | & - --/&Union, Intersection and Subtraction\\
+\hline
+3&\verb| . <: :> :>> |&Concatenation\\
+\hline
+4&\verb| : |&Label\\
+\hline
+5&\verb| -> |&Epsilon Transition\\
+\hline
+&\verb| > @ $ % |&Transitions Actions and Priorities\\
+\cline{2-3}
+&\verb| >/ $/ %/ </ @/ <>/ |&EOF Actions\\
+\cline{2-3}
+6&\verb| >! $! %! <! @! <>! |&Global Error Actions\\
+\cline{2-3}
+&\verb| >^ $^ %^ <^ @^ <>^ |&Local Error Actions\\
+\cline{2-3}
+&\verb| >~ $~ %~ <~ @~ <>~ |&To-State Actions\\
+\cline{2-3}
+&\verb| >* $* %* <* @* <>* |&From-State Action\\
+\hline
+7&\verb| * ** ? + {n} {,n} {n,} {n,m} |&Repetition\\
+\hline
+8&\verb| ! ^ |&Negation and Character-Level Negation\\
+\hline
+9&\verb| ( <expr> ) |&Grouping\\
+\hline
+\end{tabular}
+
+\section{Regular Language Operators}
+\label{machconst}
+
+When using Ragel it is helpful to have a sense of how it constructs machines.
+Sometimes this the determinization process can cause results that appear unusual to someone
+unfamiliar with it. Ragel does not make use of any nondeterministic
+intermediate state machines. All operators accept and return deterministic
+machines. However, to ease the discussion, the operations are defined in terms
+epsilon transitions.
+
+To draw an epsilon transition between two states \verb|x| and \verb|y|, is to
+copy all of the properties of \verb|y| into \verb|x|. This involves drawing in
+all of \verb|y|'s to-state actions, EOF actions, etc., as well as its
+transitions. If \verb|x| and \verb|y| both have a transition out on the same
+character, then the transitions must be combined. During transition
+combination a new transition is made which goes to a new state that is the
+combination of both target states. The new combination state is created using
+the same epsilon transition method. The new state has an epsilon transition
+drawn to all the states that compose it. Since every time an epsilon transition
+is drawn the creation of new epsilon transitions may be triggered, the process
+of drawing epsilon transitions is repeated until there are no more epsilon
+transitions to be made.
+
+A very common error that is made when using Ragel is to make machines that do
+too much at once. That is, to create machines that have unintentional
+nondeterminism. This usually results from being unaware of the common strings
+between machines that are combined together using the regular language
+operators. This can involve never leaving a machine, causing its actions to be
+propagated through all the following states. Or it can involve an alternation
+where both branches are unintentionally taken simultaneously.
+
+This problem forces one to think hard about the language that needs to be
+matched. To guard against this kind of problem one must ensure that the machine
+specification is divided up using boundaries that do not allow ambiguities from
+one portion of the machine to the next. See Chapter
+\ref{controlling-nondeterminism} for more on this problem and how to solve it.
+
+The Graphviz tool is an immense help when debugging improperly compiled
+machines or otherwise learning how to use Ragel. In many cases, practical
+parsing programs will be too large to completely visualize with Graphviz. The
+proper approach is to reduce the language to the smallest subset possible that
+still exhibits the characteristics that one wishes to learn about or to fix.
+This can be done without modifying the source code using the \verb|-M| and
+\verb|-S| options at the frontend. If a machine cannot be easily reduced,
+embeddings of unique actions can be very useful for tracing a
+particular component of a larger machine specification, since action names are
+written out on transition labels.
+
+\subsection{Union}
+
+\verb/expr | expr/
+\verbspace
+
+The union operation produces a machine that matches any string in machine one
+or machine two. The operation first creates a new start state. Epsilon
+transitions are drawn from the new start state to the start states of both
+input machines. The resulting machine has a final state set equivalent to the
+union of the final state sets of both input machines. In this operation, there
+is the opportunity for nondeterminism among both branches. If there are
+strings, or prefixes of strings that are matched by both machines then the new
+machine will follow both parts of the alternation at once. The union operation is
+shown below.
+
+\graphspace
+\begin{center}
+\includegraphics{opor}
+\end{center}
+\graphspace
+
+The following example demonstrates the union of three machines representing
+common tokens.
+
+\verbspace
+\begin{verbatim}
+# Hex digits, decimal digits, or identifiers
+main := '0x' xdigit+ | digit+ | alpha alnum*;
+\end{verbatim}
+\verbspace
+
+\graphspace
+\begin{center}
+\includegraphics{exor}
+\end{center}
+
+\subsection{Intersection}
+
+\verb|expr & expr|
+\verbspace
+
+Intersection produces a machine that matches any
+string which is in both machine one and machine two. To achieve intersection, a
+union is performed on the two machines. After the result has been made
+deterministic, any final state that is not a combination of final states from
+both machines has its final state status revoked. To complete the operation,
+paths that do not lead to a final state are pruned from the machine. Therefore,
+if there are any such paths in either of the expressions they will be removed
+by the intersection operator. Intersection can be used to require that two
+independent patterns be simultaneously satisfied as in the following example.
+
+\verbspace
+\begin{verbatim}
+# Match lines four characters wide that contain
+# words separated by whitespace.
+main :=
+ /[^\n][^\n][^\n][^\n]\n/* &
+ (/[a-z][a-z]*/ | [ \n])**;
+\end{verbatim}
+\verbspace
+
+\graphspace
+\begin{center}
+\includegraphics{exinter}
+\end{center}
+
+\subsection{Difference}
+
+\verb|expr - expr|
+\verbspace
+
+The difference operation produces a machine that matches
+strings which are in machine one but which are not in machine two. To achieve subtraction,
+a union is performed on the two machines. After the result has been made
+deterministic, any final state that came from machine two or is a combination
+of states involving a final state from machine two has its final state status
+revoked. As with intersection, the operation is completed by pruning any path
+that does not lead to a final state. The following example demonstrates the
+use of subtraction to exclude specific cases from a set.
+
+\verbspace
+\begin{verbatim}
+# Subtract keywords from identifiers.
+main := /[a-z][a-z]*/ - ( 'for' | 'int' );
+\end{verbatim}
+\verbspace
+
+\graphspace
+\begin{center}
+\includegraphics{exsubtr}
+\end{center}
+\graphspace
+
+\subsection{Strong Difference}
+\label{strong_difference}
+
+\verb|expr -- expr|
+\verbspace
+
+Strong difference produces a machine that matches any string of the first
+machine which does not have any string of the second machine as a substring. In
+the following example, strong subtraction is used to excluded \verb|CRLF| from
+a sequence.
+
+\verbspace
+\begin{verbatim}
+crlf = '\r\n';
+main := [a-z]+ ':' ( any* -- crlf ) crlf;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exstrongsubtr}
+\end{center}
+\graphspace
+
+This operator is equivalent to the following.
+
+\verbspace
+\begin{verbatim}
+expr - ( any* expr any* )
+\end{verbatim}
+
+\subsection{Concatenation}
+
+\verb|expr . expr|
+\verbspace
+
+Concatenation produces a machine that matches all the strings in machine one followed by all
+the strings in machine two. Concatenation draws epsilon transitions from the
+final states of the first machine to the start state of the second machine. The
+final states of the first machine loose their final state status, unless the
+start state of the second machine is final as well.
+Concatenation is the default operator. Two machines next to each other with no
+operator between them results in the machines being concatenated together.
+
+\graphspace
+\begin{center}
+\includegraphics{opconcat}
+\end{center}
+\graphspace
+
+The opportunity for nondeterministic behaviour results from the possibility of
+the final states of the first machine accepting a string which is also accepted
+by the start state of the second machine.
+The most common scenario that this happens in is the
+concatenation of a machine that repeats some pattern with a machine that gives
+a termination string, but the repetition machine does not exclude the
+termination string. The example in Section \ref{strong_difference}
+guards against this. Another example is the expression \verb|("'" any* "'")|.
+When exectued the thread of control will
+never leave the \verb|any*| machine. This is a problem especially if actions
+are embedded to processes the characters of the \verb|any*| component.
+
+In the following example, the first machine is always active due to the
+nondeterministic nature of concatenation. This particular nondeterminism is intended
+however because we wish to permit EOF strings before the end of the input.
+
+\verbspace
+\begin{verbatim}
+# Require an eof marker on the last line.
+main := /[^\n]*\n/* . 'EOF\n';
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exconcat}
+\end{center}
+\graphspace
+
+\noindent {\bf Note:} There is a language
+ambiguity involving concatenation and subtraction. Because concatenation is the
+default operator for two
+adjacent machines there is an ambiguity between subtraction of
+a positive numerical literal and concatenation of a negative numerical literal.
+For example, \verb|(x-7)| could be interpreted as \verb|(x . -7)| or
+\verb|(x - 7)|. In the Ragel language, the subtraction operator always takes precedence
+over concatenation of a negative literal. Precedence was given to the
+subtraction-based interpretation so as to adhere to the rule that the default
+concatenation operator takes effect only when there are no other operators between
+two machines. Beware of writing machines such as \verb|(any -1)| when what is
+desired is a concatenation of \verb|any| and -1. Instead write
+\verb|(any . -1)| or \verb|(any (-1))|. If in doubt of the meaning of your program do not
+rely on the default concatenation operator, always use the \verb|.| symbol.
+
+
+\subsection{Kleene Star}
+
+\verb|expr*|
+\verbspace
+
+The machine resulting from the Kleene Star operator will match zero or more
+repetitions of the machine it is applied to.
+It creates a new start state and an additional final
+state. Epsilon transitions are drawn between the new start state and the old start
+state, between the new start state and the new final state, and
+between the final states of the machine and the new start state. After the
+machine is made deterministic the effect is of the final states getting all the
+transitions of the start state.
+
+\graphspace
+\begin{center}
+\includegraphics{opstar}
+\end{center}
+\graphspace
+
+The possibility for nondeterministic behaviour arises if the final states have
+transitions on any of the same characters as the start state. This is common
+when applying kleene star to an alternation of tokens. Like the other problems
+arising from nondeterministic behavior, this is discussed in more detail in Chapter
+\ref{controlling-nondeterminism}. This particular problem can also be solved
+by using the longest-match construction discussed in Section
+\ref{generating-scanners} on scanners.
+
+In this simple
+example, there is no nondeterminism introduced by the exterior kleene star due
+the newline at the end of the regular expression. Without the newline the
+exterior kleene star would be redundant and there would be ambiguity between
+repeating the inner range of the regular expression and the entire regular
+expression. Though it would not cause a problem in this case, unnecessary
+nondeterminism in the kleene star operator often causes undesired results for
+new Ragel users and must be guarded against.
+
+\verbspace
+\begin{verbatim}
+# Match any number of lines with only lowercase letters.
+main := /[a-z]*\n/*;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exstar}
+\end{center}
+
+\subsection{One Or More Repetition}
+
+\verb|expr+|
+\verbspace
+
+This operator produces the concatenation of the machine with the kleene star of
+itself. The result will match one or more repetitions of the machine. The plus
+operator is equivalent to \verb|(expr . expr*)|. The plus operator makes
+repetitions that cannot be zero length.
+
+\verbspace
+\begin{verbatim}
+# Match alpha-numeric words.
+main := alnum+;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{explus}
+\end{center}
+
+\subsection{Optional}
+
+\verb|expr?|
+\verbspace
+
+The {\em optional} operator produces a machine that accepts the machine
+given or the zero length string. The optional operator is equivalent to
+\verb/(expr | '' )/. In the following example the optional operator is used to
+extend a token.
+
+\verbspace
+\begin{verbatim}
+# Match integers or floats.
+main := digit+ ('.' digit+)?;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exoption}
+\end{center}
+
+\subsection{Repetition}
+
+\begin{tabbing}
+\noindent \verb|expr {n}| \hspace{16pt}\=-- Exactly N copies of expr.\\
+
+\noindent \verb|expr {,n}| \>-- Zero to N copies of expr.\\
+
+\noindent \verb|expr {n,}| \>-- N or more copies of expr.\\
+
+\noindent \verb|expr {n,m}| \>-- N to M copies of expr.
+\end{tabbing}
+
+\subsection{Negation}
+
+\verb|!expr|
+\verbspace
+
+Negation produces a machine that matches any string not matched by the given
+machine. Negation is equivalent to \verb|(any* - expr)|.
+
+\verbspace
+\begin{verbatim}
+# Accept anything but a string beginning with a digit.
+main := ! ( digit any* );
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exnegate}
+\end{center}
+
+\subsection{Character-Level Negation}
+
+\verb|^expr|
+\verbspace
+
+Character-level negation produces a machine that matches any single character
+not matched by the given machine. Character-Level Negation is equivalent to
+\verb|(any - expr)|.
+
+\section{State Charts}
+
+It is not uncommon for programmers to implement
+parsers as manually-coded state machines, either using a switch statement or a
+state map compiler which takes a list of states, transitions and actions, and
+generates code.
+
+This method can be a very effective programming technique for producing robust
+code. The key disadvantage becomes clear when one attempts to comprehend such a
+parser. Machines coded in this way usually require many lines, causing logic to
+be spread out over large distances in the source file. Remembering the function
+of a large number of states can be difficult and organizing the parser in a
+sensible way requires discipline because branches and repetition present many
+file layout options. This kind of programming takes a specification with
+inherent structure such as looping, alternation and concatenation and expresses
+it in a flat form.
+
+If we could take an isolated component of a manually programmed state chart,
+that is, a subset of states that has only one entry point, and implement it
+using regular language operators then we could eliminate all the explicit
+naming of the states contained in it. By eliminating explicitly named states
+and replacing them with higher-level specifications we simplify a parser
+specification.
+
+For example, sometimes chains of states are needed, with only a small number of
+possible characters appearing along the chain. These can easily be replaced
+with a concatenation of characters. Sometimes a group of common states
+implement a loop back to another single portion of the machine. Rather than
+manually duplicate all the transitions that loop back, we may be able to
+express the loop using a kleene star operator.
+
+Ragel allows one to take this state map simplification approach. We can build
+state machines using a state map model and implement portions of the state map
+using regular languages. In place of any transition in the state machine,
+entire sub-state machines can be given. These can encapsulate functionality
+defined elsewhere. An important aspect of the Ragel approach is that when we
+wrap up a collection of states using a regular expression we do not loose
+access to the states and transitions. We can still execute code on the
+transitions that we have encapsulated.
+
+\subsection{Join}
+
+\verb|expr , expr , ...|
+\verbspace
+
+Join a list of machines together without
+drawing any transitions, without setting up a start state, and without
+designating any final states. Transitions between the machines may be specified
+using labels and epsilon transitions. The start state must be explicity
+specified with the ``start'' label. Final states may be specified with the an
+epsilon transition to the implicitly created ``final'' state. The join
+operation allows one to build machines using a state chart model.
+
+\subsection{Label}
+
+\verb|label: expr|
+\verbspace
+
+Attaches a label to an expression. Labels can be
+used as the target of epsilon transitions and explicit control transfer
+statements such \verb|fgoto| and \verb|fnext| in action
+code.
+
+\subsection{Epsilon}
+
+\verb|expr -> label|
+\verbspace
+
+Draws an epsilon transition to the state defined
+by \verb|label|. Epsilon transitions are made deterministic when join
+operators are evaluated. Epsilon transitions that are not in a join operation
+are made deterministic when the machine definition that contains the epsilon is
+complete. See Section \ref{labels} for information on referencing labels.
+
+
+\section{Scanners}
+\label{generating-scanners}
+
+The longest-match operator can be used to construct scanners. The generated
+machine repeatedly attempts to match one of the given patterns, first favouring
+longer pattern matches over shorter ones. If there is a choice between equal
+length matches, the match of the pattern which appears first is chosen.
+
+\verbspace
+\begin{verbatim}
+<machine_name> := |*
+ pattern1 => action1;
+ pattern2 => action2;
+ ...
+ *|;
+\end{verbatim}
+\verbspace
+
+The longest-match construction operator is not a pure state machine operator.
+It relies on the \verb|tokstart|, \verb|tokend| and \verb|act| variables to be
+present so that it can backtrack and make pointers to the matched text
+available to the user. If input is processed using multiple calls to the
+execute code then the user must ensure that when a token is only partially
+matched that the prefix is preserved on the subsequent invocation of the
+execute code.
+
+The \verb|tokstart| variable must be defined as a pointer to the input data.
+It is used for recording where the current token match begins. This variable
+may be used in action code for retrieving the text of the current match. Ragel
+ensures that in between tokens and outside of the longest-match machines that
+this pointer is set to null. In between calls to the execute code the user must
+check if \verb|tokstart| is set and if so, ensure that the data it points to is
+preserved ahead of the next buffer block. This is described in more detail
+below.
+
+The \verb|tokend| variable must also be defined as a pointer to the input data.
+It is used for recording where a match ends and where scanning of the next
+token should begin. This can also be used in action code for retrieving the
+text of the current match.
+
+The \verb|act| variable must be defined as an integer type. It is used for
+recording the identity of the last pattern matched when the scanner must go
+past a matched pattern in an attempt to make a longer match. If the longer
+match fails it may need to consult the act variable. In some cases use of the act
+variable can be avoided because the value of the current state is enough
+information to determine which token to accept, however in other cases this is
+not enough and so the \verb|act| variable is used.
+
+When the longest-match operator is in use, the user's driver code must take on
+some buffer management functions. The following algorithm gives an overview of
+the steps that should be taken to properly use the longest-match operator.
+
+\begin{itemize}
+\setlength{\parskip}{0pt}
+\item Read a block of input data.
+\item Run the execute code.
+\item If \verb|tokstart| is set, the execute code will expect the incomplete
+token to be preserved ahead of the buffer on the next invocation of the execute
+code.
+\begin{itemize}
+\item Shift the data beginning at \verb|tokstart| and ending at \verb|pe| to the
+beginning of the input buffer.
+\item Reset \verb|tokstart| to the beginning of the buffer.
+\item Shift \verb|tokend| by the distance from the old value of \verb|tokstart|
+to the new value. The \verb|tokend| variable may or may not be valid. There is
+no way to know if it holds a meaningful value because it is not kept at null
+when it is not in use. It can be shifted regardless.
+\end{itemize}
+\item Read another block of data into the buffer, immediately following any
+preserved data.
+\item Run the scanner on the new data.
+\end{itemize}
+
+Figure \ref{preserve_example} shows the required handling of an input stream in
+which a token is broken by the input block boundaries. After processing up to
+and including the ``t'' of ``characters'', the prefix of the string token must be
+retained and processing should resume at the ``e'' on the next iteration of
+the execute code.
+
+If one uses a large input buffer for collecting input then the number of times
+the shifting must be done will be small. Furthermore, if one takes care not to
+define tokens that are allowed to be very long and instead processes these
+items using pure state machines or sub-scanners, then only a small amount of
+data will ever need to be shifted.
+
+\begin{figure}
+\begin{verbatim}
+ a) A stream "of characters" to be scanned.
+ | | |
+ p tokstart pe
+
+ b) "of characters" to be scanned.
+ | | |
+ tokstart p pe
+\end{verbatim}
+\caption{Following an invocation of the execute code there may be a partially
+matched token (a). The data of the partially matched token
+must be preserved ahead of the new data on the next invocation (b).}
+\label{preserve_example}
+\end{figure}
+
+Since scanners attempt to make the longest possible match of input, in some
+cases they are not able to identify a token upon parsing its final character,
+they must wait for a lookahead character. For example if trying to match words,
+the token match must be triggered on following whitespace in case more
+characters of the word have yet to come. The user must therefore arrange for an
+EOF character to be sent to the scanner to flush out any token that has not yet
+been matched. The user can exclude a single character from the entire scanner
+and use this character as the EOF character, possibly specifying an EOF action.
+For most scanners, zero is a suitable choice for the EOF character.
+
+Alternatively, if whitespace is not significant and ignored by the scanner, the
+final real token can be flushed out by simply sending an additional whitespace
+character on the end of the stream. If the real stream ends with whitespace
+then it will simply be extended and ignored. If it does not, then the last real token is
+guaranteed to be flushed and the dummy EOF whitespace ignored.
+An example scanner processing loop is given in Figure \ref{scanner-loop}.
+
+\begin{figure}
+\small
+\begin{verbatim}
+ int have = 0;
+ bool done = false;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block after any data we already have. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ /* If no data was read, send the EOF character.
+ if ( len == 0 ) {
+ p[0] = 0, len++;
+ done = true;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - tokstart;
+ memmove( inbuf, tokstart, have );
+ tokend = inbuf + (tokend-tokstart);
+ tokstart = inbuf;
+ }
+ }
+\end{verbatim}
+\caption{A processing loop for a scanner.}
+\label{scanner-loop}
+\end{figure}
+
+
+\section{Write Statement}
+\label{write-statement}
+
+\begin{verbatim}
+write <component> [options];
+\end{verbatim}
+\verbspace
+
+
+The write statement is used to generate parts of the machine.
+There are four
+components that can be generated by a write statement. These components are the
+state machine's data, initialization code, execution code and EOF action
+execution code. A write statement may appear before a machine is fully defined.
+This allows one to write out the data first then later define the machine where
+it is used. An example of this is show in Figure \ref{fbreak-example}.
+
+\subsection{Write Data}
+\begin{verbatim}
+write data [options];
+\end{verbatim}
+\verbspace
+
+The write data statement causes Ragel to emit the constant static data needed
+by the machine. In table-driven output styles (see Section \ref{genout}) this
+is a collection of arrays that represent the states and transitions of the
+machine. In goto-driven machines much less data is emitted. At the very
+minimum a start state \verb|name_start| is generated. All variables written
+out in machine data have both the \verb|static| and \verb|const| properties and
+are prefixed with the name of the machine and an
+underscore. The data can be placed inside a class, inside a function, or it can
+be defined as global data.
+
+Two variables are written that may be used to test the state of the machine
+after a buffer block has been processed. The \verb|name_error| variable gives
+the id of the state that the machine moves into when it cannot find a valid
+transition to take. The machine immediately breaks out of the processing loop when
+it finds itself in the error state. The error variable can be compared to the
+current state to determine if the machine has failed to parse the input. If the
+machine is complete, that is from every state there is a transition to a proper
+state on every possible character of the alphabet, then no error state is required
+and this variable will be set to -1.
+
+The \verb|name_first_final| variable stores the id of the first final state. All of the
+machine's states are sorted by their final state status before having their ids
+assigned. Checking if the machine has accepted its input can then be done by
+checking if the current state is greater-than or equal to the first final
+state.
+
+Data generation has several options:
+
+\begin{itemize}
+\item \verb|noerror| - Do not generate the integer variable that gives the
+id of the error state.
+\item \verb|nofinal| - Do not generate the integer variable that gives the
+id of the first final state.
+\item \verb|noprefix| - Do not prefix the variable names with the name of the
+machine.
+\end{itemize}
+
+\subsection{Write Init}
+\begin{verbatim}
+write init;
+\end{verbatim}
+\verbspace
+
+The write init statement causes Ragel to emit initialization code. This should
+be executed once before the machine is started. At a very minimum this sets the
+current state to the start state. If other variables are needed by the
+generated code, such as call
+stack variables or longest-match management variables, they are also
+initialized here.
+
+\subsection{Write Exec}
+\begin{verbatim}
+write exec [options];
+\end{verbatim}
+\verbspace
+
+The write exec statement causes Ragel to emit the state machine's execution code.
+Ragel expects several variables to be available to this code. At a very minimum, the
+generated code needs access to the current character position \verb|p|, the ending
+position \verb|pe| and the current state \verb|cs|, though \verb|pe|
+can be excluded by specifying the \verb|noend| write option.
+The \verb|p| variable is the cursor that the execute code will
+used to traverse the input. The \verb|pe| variable should be set up to point to one
+position past the last valid character in the buffer.
+
+Other variables are needed when certain features are used. For example using
+the \verb|fcall| or \verb|fret| statements requires \verb|stack| and
+\verb|top| variables to be defined. If a longest-match construction is used,
+variables for managing backtracking are required.
+
+The write exec statement has one option. The \verb|noend| option tells Ragel
+to generate code that ignores the end position \verb|pe|. In this
+case the user must explicitly break out of the processing loop using
+\verb|fbreak|, otherwise the machine will continue to process characters until
+it moves into the error state. This option is useful if one wishes to process a
+null terminated string. Rather than traverse the string to discover then length
+before processing the input, the user can break out when the null character is
+seen. The example in Figure \ref{fbreak-example} shows the use of the
+\verb|noend| write option and the \verb|fbreak| statement for processing a string.
+
+\begin{figure}
+\small
+\begin{verbatim}
+#include <stdio.h>
+%% machine foo;
+int main( int argc, char **argv )
+{
+ %% write data noerror nofinal;
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ %%{
+ main :=
+ [a-z]+
+ 0 @{ res = 1; fbreak; };
+ write init;
+ write exec noend;
+ }%%
+ }
+ printf("execute = %i\n", res );
+ return 0;
+}
+\end{verbatim}
+\caption{Use of {\tt noend} write option and the {\tt fbreak} statement for
+processing a string.}
+\label{fbreak-example}
+\end{figure}
+
+
+\subsection{Write EOF Actions}
+\begin{verbatim}
+write eof;
+\end{verbatim}
+\verbspace
+
+The write EOF statement causes Ragel to emit code that executes EOF actions.
+This write statement is only relevant if EOF actions have been embedded,
+otherwise it does not generate anything. The EOF action code requires access to
+the current state.
+
+\section{Referencing Names}
+\label{labels}
+
+This section describes how to reference names in epsilon transitions and
+action-based control-flow statements such as \verb|fgoto|. There is a hierarchy
+of names implied in a Ragel specification. At the top level are the machine
+instantiations. Beneath the instantiations are labels and references to machine
+definitions. Beneath those are more labels and references to definitions, and
+so on.
+
+Any name reference may contain multiple components separated with the \verb|::|
+compound symbol. The search for the first component of a name reference is
+rooted at the join expression that the epsilon transition or action embedding
+is contained in. If the name reference is not not contained in a join,
+the search is rooted at the machine definition that that the epsilon transition or
+action embedding is contained in. Each component after the first is searched
+for beginning at the location in the name tree that the previous reference
+component refers to.
+
+In the case of action-based references, if the action is embedded more than
+once, the local search is performed for each embedding and the result is the
+union of all the searches. If no result is found for action-based references then
+the search is repeated at the root of the name tree. Any action-based name
+search may be forced into a strictly global search by prefixing the name
+reference with \verb|::|.
+
+The final component of the name reference must resolve to a unique entry point.
+If a name is unique in the entire name tree it can be referenced as is. If it
+is not unique it can be specified by qualifying it with names above it in the
+name tree. However, it can always be renamed.
+
+% FIXME: Should fit this in somewhere.
+% Some kinds of name references are illegal. Cannot call into longest-match
+% machine, can only call its start state. Cannot make a call to anywhere from
+% any part of a longest-match machine except a rule's action. This would result
+% in an eventual return to some point inside a longest-match other than the
+% start state. This is banned for the same reason a call into the LM machine is
+% banned.
+
+\section{State Machine Minimization}
+
+State machine minimization is the process of finding the minimal equivalent FSM accepting
+the language. Minimization reduces the number of states in machines
+by merging equivalent states. It does not change the behaviour of the machine
+in any way. It will cause some states to be merged into one because they are
+functionally equivalent. State minimization is on by default. It can be turned
+off with the \verb|-n| option.
+
+The algorithm implemented is similar to Hopcroft's state minimization
+algorithm. Hopcroft's algorithm assumes a finite alphabet that can be listed in
+memory, whereas Ragel supports arbitrary integer alphabets that cannot be
+listed in memory. Though exact analysis is very difficult, Ragel minimization
+runs close to $O(n \times log(n))$ and requires $O(n)$ temporary storage where
+$n$ is the number of states.
+
+\chapter{User Actions}
+
+\section{Embedding Actions}
+
+\begin{verbatim}
+action ActionName {
+ /* Code an action here. */
+ count += 1;
+}
+\end{verbatim}
+\verbspace
+
+The action statement defines a block of code that can be embedded into an FSM.
+Action names can be referenced by the action embedding operators in
+expressions. Though actions need not be named in this way (literal blocks
+of code can be embedded directly when building machines), defining reusable
+blocks of code whenever possible is good practice because it potentially increases the
+degree to which the machine can be minimized. Within an action some Ragel expressions
+and statements are parsed and translated. These allow the user to interact with the machine
+from action code. See Section \ref{vals} for a complete list of statements and
+values available in code blocks.
+
+\subsection{Entering Action}
+
+\verb|expr > action|
+\verbspace
+
+The entering operator embeds an action into the starting transitions. The
+action is executed on all transitions that enter into the machine from the
+start state. If the start state is a final state then it is possible for the
+machine to never be entered and the starting transitions bypassed. In the
+following example, the action is executed on the first transition of the
+machine. If the repetition machine is bypassed the action is not executed.
+
+\verbspace
+\begin{verbatim}
+# Execute A at the beginning of a string of alpha.
+main := ( lower* >A ) . ' ';
+\end{verbatim}
+
+\begin{center}
+\includegraphics{exstact}
+\end{center}
+
+\subsection{Finishing Action}
+
+\verb|expr @ action|
+\verbspace
+
+The finishing action operator embeds an action into any transitions that go into a
+final state. Whether or not the machine accepts is not determined at the point
+the action is executed. Further input may move the machine out of the accepting
+state, but keep it in the machine. As in the following example, the
+into-final-state operator is most often used when no lookahead is necessary.
+
+\verbspace
+\begin{verbatim}
+# Execute A when the trailing space is seen.
+main := ( lower* ' ' ) @A;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exdoneact}
+\end{center}
+
+\subsection{All Transition Action}
+
+\verb|expr $ action|
+\verbspace
+
+The all transition operator embeds an action into all transitions of a machine.
+The action is executed whenever a transition of the machine is taken. In the
+following example, A is executed on every character matched.
+
+\verbspace
+\begin{verbatim}
+# Execute A on any characters of machine one or two.
+main := ( 'm1' | 'm2' ) $A;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exallact}
+\end{center}
+
+\subsection{Pending Out Actions}
+\label{out-actions}
+
+\verb|expr % action|
+\verbspace
+
+The pending out action operator embeds an action into the pending out
+transitions of a machine. The action is first embedded into the final states of
+the machine and later transferred to any transitions made going out of the
+machine. The transfer can be caused either by a concatenation or kleene star
+operation. This mechanism allows one to associate an action with the
+termination of a sequence, without being concerned about what particular
+character terminates the sequence. In the following example, A is executed
+when leaving the alpha machine by the newline character.
+
+\verbspace
+\begin{verbatim}
+# Match a word followed by an newline. Execute A when
+# finishing the word.
+main := ( lower+ %A ) . '\n';
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exfinact}
+\end{center}
+\graphspace
+
+
+In the following example, the \verb|term_word| action could be used to register
+the appearance of a word and to clear the buffer that the \verb|lower| action used
+to store the text of it.
+
+\verbspace
+\begin{verbatim}
+word = ( [a-z] @lower )+ %term_word;
+main := word ( ' ' @space word )* '\n' @newline;
+\end{verbatim}
+\verbspace
+
+% FIXME: add
+%\begin{center}
+%\includegraphics[scale=0.4]{outact.ps}
+%\end{center}
+
+In this final example of the action embedding operators, A is executed upon
+entering the alpha machine, B is executed on all transitions of the alpha
+machine, C is executed when the alpha machine accepts by moving into the
+newline machine and N is executed when the newline machine moves into a final
+state.
+
+\verbspace
+\begin{verbatim}
+# Execute A on starting the alpha machine, B on every transition
+# moving through it and C upon finishing. Execute N on the newline.
+main := ( lower* >A $B %C ) . '\n' @N;
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exaction}
+\end{center}
+
+\section{State Action Embedding Operators}
+
+The state embedding operators allow one to embed actions into states. Like the
+transition embedding operators, there are several different classes of states
+that the operators access. The meanings of the symbols are partially related to
+the meanings of the symbols used by the transition embedding operators.
+
+The state embedding operators are different from the transition embedding
+operators in that there are various kinds of events that embedded actions can
+be associated with, requiring them to be distinguished by these different types
+of events. The state embedding operators have two components. The first, which
+is the first one or two characters, specifies the class of states that the
+action will be embedded into. The second component specifies the type of event
+the action will be executed on.
+
+\def\fakeitem{\hspace*{12pt}$\bullet$\hspace*{10pt}}
+
+\begin{minipage}{\textwidth}
+\begin{multicols}{2}
+\raggedcolumns
+\noindent The different classes of states are:\\
+\fakeitem \verb|> | -- the start state \\
+\fakeitem \verb|$ | -- all states\\
+\fakeitem \verb|% | -- final states\\
+\fakeitem \verb|< | -- any state except the start state\\
+\fakeitem \verb|@ | -- any state except final states\\
+\fakeitem \verb|<>| -- any except start and final (middle)
+
+\columnbreak
+
+\noindent The different kinds of embeddings are:\\
+\fakeitem \verb|~| -- to-state actions\\
+\fakeitem \verb|*| -- from-state actions\\
+\fakeitem \verb|/| -- EOF actions\\
+\fakeitem \verb|!| -- error actions\\
+\fakeitem \verb|^| -- local error actions\\
+\end{multicols}
+\end{minipage}
+%\label{state-act-embed}
+%\caption{The two components of state embedding operators. The class of states
+%to select comes first, followed by the type of embedding.}
+%
+%\begin{figure}[t]
+%\centering
+%\includegraphics{stembed}
+%\caption{Summary of state manipulation operators}
+%\label{state-act-embed-chart}
+%\end{figure}
+
+%\noindent Putting these two components together we get a matrix of state
+%embedding operators. The entire set is given in Figure \ref{state-act-embed-chart}.
+
+
+\subsection{To-State and From-State Actions}
+
+\subsubsection{To-State Actions}
+
+\verb| >~ $~ %~ <~ @~ <>~ |
+\verbspace
+
+To-state actions are executed whenever the state machine moves into the
+specified state, either by a natural movement over a transition or by an
+action-based transfer of control such as \verb|fgoto|. They are executed after the
+in-transition's actions but before the current character is advanced and
+tested against the end of the input block. To-state embeddings stay with the
+state. They are irrespective of the state's current set of transitions and any
+future transitions that may be added in or out of the state.
+
+Note that the setting of the current state variable \verb|cs| outside of the
+execute code is not considered by Ragel as moving into a state and consequently
+the to-state actions of the new current state are not executed. This includes
+the initialization of the current state when the machine begins. This is
+because the entry point into the machine execution code is after the execution
+of to-state actions.
+
+\subsubsection{From-State Actions}
+
+\verb| >* $* %* <* @* <>* |
+\verbspace
+
+From-state actions are executed whenever the state machine takes a transition from a
+state, either to itself or to some other state. These actions are executed
+immediately after the current character is tested against the input block end
+marker and before the transition to take is sought based on the current
+character. From-state actions are therefore executed even if a transition
+cannot be found and the machine moves into the error state. Like to-state
+embeddings, from-state embeddings stay with the state.
+
+\subsection{EOF Actions}
+
+\verb| >/ $/ %/ </ @/ <>/ |
+\verbspace
+
+The EOF action embedding operators enable the user to embed EOF actions into
+different classes of
+states. EOF actions are stored in states and generated with the \verb|write eof|
+statement. The generated EOF code switches on the current state and executes the EOF
+actions associated with it.
+
+\subsection{Handling Errors}
+
+\subsubsection{Global Error Actions}
+
+\verb| >! $! %! <! @! <>! |
+\verbspace
+
+Error actions are stored in states until the final state machine has been fully
+constructed. They are then transferred to the transitions that move into the
+error state. This transfer entails the creation of a transition from the state
+to the error state that is taken on all input characters which are not already
+covered by the state's transitions. In other words it provides a default
+action. Error actions can induce a recovery by altering \verb|p| and then jumping back
+into the machine with \verb|fgoto|.
+
+\subsubsection{Local Error Actions}
+
+\verb| >^ $^ %^ <^ @^ <>^ |
+\verbspace
+
+Like global error actions, local error actions are also stored in states until
+a transfer point. The transfer point is different however. Each local error action
+embedding is associated with a name. When a machine definition has been fully
+constructed, all local error actions embeddings associated the same name as the
+machine are transferred to error transitions. Local error actions can be used
+to specify an action to take when a particular section of a larger state
+machine fails to make a match. A particular machine definition's ``thread'' may
+die and the local error actions executed, however the machine as a whole may
+continue to match input.
+
+There are two forms of local error action embeddings. In the first form the name defaults
+to the current machine. In the second form the machine name can be specified. This
+is useful when it is more convenient to specify the local error action in a
+sub-definition that is used to construct the machine definition where the
+transfer should happen. To embed local error actions and explicitly state the
+machine on which the transfer is to happen use \verb|(name, action)| as the
+action.
+
+\begin{comment}
+\begin{itemize}
+\setlength{\parskip}{0in}
+\item \verb|expr >^ (name, action) | -- Start state.
+\item \verb|expr $^ (name, action) | -- All states.
+\item \verb|expr %^ (name, action) | -- Final states.
+\item \verb|expr <^ (name, action) | -- Not start state.
+\item \verb|expr <>^ (name, action)| -- Not start and not final states.
+\end{itemize}
+\end{comment}
+
+\section{Action Ordering and Duplicates}
+
+When building a parser by combining smaller expressions which themselves have
+embedded actions, it is often the case that transitions are made which need to
+execute a number of actions on one input character. For example when we leave
+an expression, we may execute the expression's pending out action and the
+subsequent expression's starting action on the same input character. We must
+therefore devise a method for ordering actions that is both intuitive and
+predictable for the user and repeatable by the state machine compiler. The
+determinization processes cannot simply order actions by the time at which they
+are introduced into a transition -- otherwise the programmer will be at the
+mercy of luck.
+
+We associate with the embedding of each action a distinct timestamp which is
+used to order actions that appear together on a single transition in the final
+compiled state machine. To accomplish this we traverse the parse tree of
+regular expressions and assign timestamps to action embeddings. This algorithm
+is recursive in nature and quite simple. When it visits a parse tree node it
+assigns timestamps to all {\em starting} action embeddings, recurses on the
+parse tree, then assigns timestamps to the remaining {\em all}, {\em
+finishing}, and {\em leaving} embeddings in the order in which they appear.
+
+Ragel does not permit actions (defined or unnamed) to appear multiple times in
+an action list. When the final machine has been created, actions which appear
+more than once in single transition or EOF action list have their duplicates
+removed. The first appearance of the action is preserved. This is useful in a
+number of scenarios. First, it allows us to union machines with common
+prefixes without worrying about the action embeddings in the prefix being
+duplicated. Second, it prevents pending out actions from being transferred multiple times
+when a concatenation follows a kleene star and the two machines begin with a common
+character.
+
+\verbspace
+\begin{verbatim}
+word = [a-z]+ %act;
+main := word ( '\n' word )* '\n\n';
+\end{verbatim}
+
+\section{Values and Statements Available in Code Blocks}
+\label{vals}
+
+\noindent The following values are available in code blocks:
+
+\begin{itemize}
+\item \verb|fpc| -- A pointer to the current character. This is equivalent to
+accessing the \verb|p| variable.
+
+\item \verb|fc| -- The current character. This is equivalent to the expression \verb|(*p)|.
+
+\item \verb|fcurs| -- An integer value representing the current state. This
+value should only be read from. To move to a different place in the machine
+from action code use the \verb|fgoto|, \verb|fnext| or \verb|fcall| statements.
+Outside of the machine execution code the \verb|cs| variable may be modified.
+
+\item \verb|ftargs| -- An integer value representing the target state. This
+value should only be read from. Again, \verb|fgoto|, \verb|fnext| and
+\verb|fcall| can be used to move to a specific entry point.
+
+\item \verb|fentry(<label>)| -- Retrieve an integer value representing the
+entry point \verb|label|. The integer value returned will be a compile time
+constant. This number is suitable for later use in control flow transfer
+statements that take an expression. This value should not be compared against
+the current state because any given label can have multiple states representing
+it. The value returned by \verb|fentry| will be one of the possibly multiple states the
+label represents.
+\end{itemize}
+
+\noindent The following statements are available in code blocks:
+
+\begin{itemize}
+
+\item \verb|fhold;| -- Do not advance over the current character. If processing
+data in multiple buffer blocks, the \verb|fhold| statement should only be used
+once in the set of actions executed on a character. Multiple calls may result
+in backing up over the beginning of the buffer block. The \verb|fhold|
+statement does not imply any transfer of control. In actions embedded into
+transitions, it is equivalent to the \verb|p--;| statement. In scanner pattern
+actions any changes made to \verb|p| are lost. In this context, \verb|fhold| is
+equivalent to \verb|tokend--;|.
+
+\item \verb|fexec <expr>;| -- Set the next character to process. This can be
+used to backtrack to previous input or advance ahead.
+Unlike \verb|fhold|, which can be used
+anywhere, \verb|fexec| requires the user to ensure that the target of the
+backtrack is in the current buffer block or is known to be somewhere ahead of
+it. The machine will continue iterating forward until \verb|pe| is arrived,
+\verb|fbreak| is called or the machine moves into the error state. In actions
+embedded into transitions, the \verb|fexec| statement is equivalent to setting
+\verb|p| to one position ahead of the next character to process. If the user
+also modifies \verb|pe|, it is possible to change the buffer block entirely.
+In scanner pattern actions any changes made to \verb|p| are lost. In this
+context, \verb|fexec| is equivalent to setting \verb|tokend| to the next
+character to process.
+
+\item \verb|fgoto <label>;| -- Jump to an entry point defined by
+\verb|<label>|. The \verb|fgoto| statement immediately transfers control to
+the destination state.
+
+\item \verb|fgoto *<expr>;| -- Jump to an entry point given by \verb|<expr>|.
+The expression must evaluate to an integer value representing a state.
+
+\item \verb|fnext <label>;| -- Set the next state to be the entry point defined
+by \verb|label|. The \verb|fnext| statement does not immediately jump to the
+specified state. Any action code following the statement is executed.
+
+\item \verb|fnext *<expr>;| -- Set the next state to be the entry point given
+by \verb|<expr>|. The expression must evaluate to an integer value representing
+a state.
+
+\item \verb|fcall <label>;| -- Push the target state and jump to the entry
+point defined by \verb|<label>|. The next \verb|fret| will jump to the target
+of the transition on which the call was made. Use of \verb|fcall| requires
+the declaration of a call stack. An array of integers named \verb|stack| and a
+single integer named \verb|top| must be declared. With the \verb|fcall|
+construct, control is immediately transferred to the destination state.
+
+\item \verb|fcall *<expr>;| -- Push the current state and jump to the entry
+point given by \verb|<expr>|. The expression must evaluate to an integer value
+representing a state.
+
+\item \verb|fret;| -- Return to the target state of the transition on which the
+last \verb|fcall| was made. Use of \verb|fret| requires the declaration of a
+call stack with \verb|fstack| in the struct block. Control is immediately
+transferred to the destination state.
+
+\item \verb|fbreak;| -- Save the current state and immediately break out of the
+execute loop. This statement is useful in conjunction with the \verb|noend|
+write option. Rather than process input until the end marker of the input
+buffer is arrived at, the fbreak statement can be used to stop processing input
+upon seeing some end-of-string marker. It can also be used for handling
+exceptional circumstances. The fbreak statement does not change the pointer to
+the current character. After an \verb|fbreak| call the \verb|p| variable will point to
+the character that was being traversed over when the action was
+executed. The current state will be the target of the current transition.
+
+\end{itemize}
+
+\noindent {\bf Note:} Once actions with control-flow commands are embedded into a
+machine, the user must exercise caution when using the machine as the operand
+to other machine construction operators. If an action jumps to another state
+then unioning any transition that executes that action with another transition
+that follows some other path will cause that other path to be lost. Using
+commands that manually jump around a machine takes us out of the domain of
+regular languages because transitions that may be conditional and that the
+machine construction operators are not aware of are introduced. These
+commands should therefore be used with caution.
+
+
+\chapter{Controlling Nondeterminism}
+\label{controlling-nondeterminism}
+
+Along with the flexibility of arbitrary action embeddings comes a need to
+control nondeterminism in regular expressions. If a regular expression is
+ambiguous, then sup-components of a parser other than the intended parts may be
+active at any given time. This means that actions which are irrelevant to the
+current subset of the parser may be executed, causing problems for the
+programmer.
+
+Tools which are based on regular expression engines and which are used for
+recognition tasks will usually function as intended regardless of the presence
+of ambiguities. It is quite common for users of scripting languages to write
+regular expressions that are heavily ambiguous and it generally does not
+matter. As long as one of the potential matches is recognized, there can be any
+number of other matches present.
+
+In some systems, matched text is attributed to a portion of a regular
+expression. Armed with the knowledge that the regular expression engine always
+pursues the longest match or the shortest match, the user is able to compose
+their patterns accordingly.
+
+In Ragel, there is no regular expression run-time engine, just a simple state
+machine execution model. When we begin to embed actions and face the
+possibility of spurious action execution, it becomes clear that controlling
+nondeterminism at the machine construction level is very important. Consider
+the following example.
+
+\verbspace
+\begin{verbatim}
+lines = ( word ( space word )* '\n' )*;
+\end{verbatim}
+\verbspace
+
+Since the \verb|space| built-in expression includes the newline character, we will
+not leave the line expression when a newline character is seen. We will
+simultaneously pursue the possibility of matching further words on the same
+line and the possibility of matching a second line. The solution here is easy:
+simply exclude the newline character from the \verb|space| expression. Solving
+this kind of problem is straightforward because the string that terminates the
+sequence is a single character long. When it is multiple characters long we
+have a more difficult problem, as shown by the following example.
+
+\verbspace
+\begin{verbatim}
+comment = '/*' any* '*/';
+\end{verbatim}
+\verbspace
+
+Using standard concatenation, we will never leave the \verb|any*| expression.
+We will forever entertain the possibility that a \verb|'*/'| string that we see
+is contained in a longer comment and that, simultaneously, the comment has
+ended. One way to approach the problem is to exclude the terminating string
+from the \verb|any*| expression using set difference. We must be careful to
+exclude not just the terminating string, but any string that contains it as a
+substring. A verbose, but proper specification of a C comment parser is given
+by the following regular expression. Note that this operation is the basis of the
+strong subtraction operator.
+
+\verbspace
+\begin{verbatim}
+comment = '/*' ( any* - ( any* '*/' any* ) ) '*/';
+\end{verbatim}
+\verbspace
+
+We can also phrase the problem in terms of the transitions of the state
+machines that implement these expressions. During the concatenation of
+\verb|any*| and \verb|'*/'| we will be making transitions that are composed of
+both the loop of the first expression and the characters of the second.
+At this time we want the transition on the \verb|'/'| character to take precedence
+over and disallow the transition that originated in the \verb|any*| loop.
+
+In another scenario, we wish to implement a lightweight tokenizer that we can
+utilize in the composition of a larger machine. For example, some HTTP headers
+have a token stream as a sub-language.
+
+\verbspace
+\begin{verbatim}
+header_contents = ( lower+ | digit+ | ' ' )*;
+\end{verbatim}
+\verbspace
+
+In this case, the problem with using a standard kleene star operation is that
+there is an ambiguity between extending a token and wrapping around the
+machine to begin a new token. Using the standard operator, we get
+an undesirable nondeterministic behaviour. What is required is for the
+transitions that represent an extension of a token to take precedence over the
+transitions that represent the beginning of a new token. For this problem,
+there is no simple solution that uses standard regular expressions.
+
+\section{Priorities}
+
+A priority mechanism was devised and built into the determinization
+process, specifically for the purpose of allowing the user to control
+nondeterminism. Priorities are integer values embedded into transitions. When
+the determinization process is combining transitions that have different
+priorities, the transition with the higher priority is preserved and the
+transition with the lower priority is dropped.
+
+Unfortunately, priorities can have unintended side effects because their
+operation requires that they linger in transitions indefinitely. They must linger
+because the Ragel program cannot know when the user is finished with a priority
+embedding. A solution whereby they are explicitly deleted after use is
+conceivable; however this is not very user-friendly. Priorities were therefore
+made into named entities. Only priorities with the same name are allowed to
+interact. This allows any number of priorities to coexist in one machine for
+the purpose of controlling various different regular expression operations and
+eliminates the need to ever delete them. Such a scheme allows the user to
+choose a unique name, embed two different priority values using that name
+and be confident that the priority embedding will be free of any side effects.
+
+\section{Priority Assignment}
+
+Priorities are integer values assigned to names within transitions.
+Only priorities with the same name are allowed to interact. When the machine
+construction process is combining transitions that have different priorities
+assiged to the same name, the transition with the higher priority is preserved
+and the lower priority is dropped.
+
+In the first form of priority embedding the name defaults to the name of the machine
+definition that the priority is assigned in. In this sense priorities are by
+default local to the current machine definition or instantiation. Beware of
+using this form in a longest-match machine, since there is only one name for
+the entire set of longest match patterns. In the second form the priority's
+name can be specified, allowing priority interaction across machine definition
+boundaries.
+
+\begin{itemize}
+\setlength{\parskip}{0in}
+\item \verb|expr > int| -- Sets starting transitions to have priority int.
+\item \verb|expr @ int| -- Sets transitions that go into a final state to have priority int.
+\item \verb|expr $ int| -- Sets all transitions to have priority int.
+\item \verb|expr % int| -- Sets pending out transitions from final states to
+have priority int.\\ When a transition is made going out of the machine (either
+by concatenation or kleene star) its priority is immediately set to the pending
+out priority.
+\end{itemize}
+
+The second form of priority assignment allows the programmer to specify the name
+to which the priority is assigned.
+
+\begin{itemize}
+\setlength{\parskip}{0in}
+\item \verb|expr > (name, int)| -- Entering transitions.
+\item \verb|expr @ (name, int)| -- Transitions into final state.
+\item \verb|expr $ (name, int)| -- All transitions.
+\item \verb|expr % (name, int)| -- Pending out transitions.
+\end{itemize}
+
+\section{Guarded Operators that Encapsulate Priorities}
+
+Priorities can be very confusing for the user. They force the user to imagine
+the transitions inside machines and work out the precise effects of regular
+expression operations. When we consider that this problem is worsened by the
+potential for side effects caused by unintended priority name collisions, we
+see that exposing the user to priorities is rather undesirable.
+
+Fortunately, in practice the use of priorities has been necessary only in a
+small number of scenarios. This allows us to encapsulate their functionality
+into a small set of operators and fully hide them from the user. This is
+advantageous from a language design point of view because it greatly simplifies
+the design.
+
+\begin{comment}
+Example from 2 page poster paper.
+% GENERATE: lmkleene
+% %%{
+% machine lmkleene;
+% action id {}
+% action number {}
+% action ws {}
+% action mark {}
+\begin{verbatim}
+main := ( lower+ ':' ' '* <: (
+ ( lower ( lower | digit )* ) >mark %id |
+ digit+ >mark %number |
+ ' '+ >mark %ws
+)** '\n' )*;
+\end{verbatim}
+% }%%
+% END GENERATE
+
+% FIXME: Add
+%\begin{center}
+%\includegraphics[scale=0.4]{lmkleene.ps}
+%\end{center}
+\end{comment}
+
+\subsection{Entry-Guarded Contatenation}
+
+\verb|expr :> expr|
+\verbspace
+
+This operator concatenates two machines, but first assigns a low
+priority to all transitions
+of the first machine and a high priority to the entering transitions of the
+second machine. This operator is useful if from the final states of the first
+machine, it is possible to accept the characters in the start transitions of
+the second machine. This operator effectively terminates the first machine
+immediately upon entering the second machine, where otherwise they would be
+pursued concurrently. In the following example, entry-guarded concatenation is
+used to move out of a machine that matches everything at the first sign of an
+end-of-input marker.
+
+\verbspace
+\begin{verbatim}
+# Leave the catch-all machine on the first character of FIN.
+main := any* :> 'FIN';
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exstpri}
+\end{center}
+\graphspace
+
+Entry-guarded concatenation is equivalent to the following:
+
+\verbspace
+\begin{verbatim}
+expr $(unique_name,0) . expr >(unique_name,1)
+\end{verbatim}
+
+\subsection{Finish-Guarded Contatenation}
+
+\verb|expr :>> expr|
+\verbspace
+
+This operator is
+like the previous operator, except the higher priority is placed on the final
+transitions of the second machine. This is useful if one wishes to entertain
+the possibility of continuing to match the first machine right up until the
+second machine enters a final state. In other words it terminates the first
+machine only when the second accepts. In the following example, finish-guarded
+concatenation causes the move out of the machine that matches everything to be
+delayed until the full end-of-input marker has been matched.
+
+\verbspace
+\begin{verbatim}
+# Leave the catch-all machine on the last character of FIN.
+main := any* :>> 'FIN';
+\end{verbatim}
+\verbspace
+
+\begin{center}
+\includegraphics{exdonepri}
+\end{center}
+\graphspace
+
+Finish-guarded concatenation is equivalent to the following:
+
+\verbspace
+\begin{verbatim}
+expr $(unique_name,0) . expr @(unique_name,1)
+\end{verbatim}
+
+\subsection{Left-Guarded Concatenation}
+
+\verb|expr <: expr|
+\verbspace
+
+This operator places
+a higher priority on the left expression. It is useful if you want to prefix a
+sequence with another sequence composed of some of the same characters. For
+example, one can consume leading whitespace before tokenizing a sequence of
+whitespace-separated words as in:
+
+\verbspace
+\begin{verbatim}
+( ' '* <: ( ' '+ | [a-z]+ )** )
+\end{verbatim}
+\verbspace
+
+Left-guarded concatenation is equivalent to the following:
+
+\verbspace
+\begin{verbatim}
+expr $(unique_name,1) . expr >(unique_name,0)
+\end{verbatim}
+\verbspace
+
+\subsection{Longest-Match Kleene Star}
+\label{longest_match_kleene_star}
+
+\verb|expr**|
+\verbspace
+
+This version of kleene star puts a higher priority on staying in the
+machine versus wrapping around and starting over. The LM kleene star is useful
+when writing simple tokenizers. These machines are built by applying the
+longest-match kleene star to an alternation of token patterns, as in the
+following.
+
+\verbspace
+\begin{verbatim}
+# Repeat tokens, but make sure to get the longest match.
+main := (
+ lower ( lower | digit )* %A |
+ digit+ %B |
+ ' '
+)**;
+\end{verbatim}
+
+\verbspace
+
+\begin{center}
+\includegraphics{exfinpri}
+\end{center}
+\graphspace
+
+If a regular kleene star were used the machine above would not be able to
+distinguish between extending a word and beginning a new one. This operator is
+equivalent to:
+
+\verbspace
+\begin{verbatim}
+( expr $(unique_name,1) %(unique_name,0) )*
+\end{verbatim}
+\verbspace
+
+When the kleene star is applied, transitions are made out of the machine which
+go back into it. These are assigned a priority of zero by the pending out
+transition mechanism. This is less than the priority of the transitions out of
+the final states that do not leave the machine. When two transitions clash on
+the same character, the differing priorities causes the transition which
+stays in the machine to take precedence. The transition that wraps around is
+dropped.
+
+Note that this operator does not build a scanner in the traditional sense because
+there is never any backtracking. To build a scanner in the traditional sense
+use the Longest-Match machine construction described Section \ref{generating-scanners}.
+
+\chapter{Interface to Host Program}
+
+\section{Alphtype Statement}
+
+\begin{verbatim}
+alphtype unsigned int;
+\end{verbatim}
+\verbspace
+
+The alphtype statement specifies the alphabet data type that the machine
+operates on. During the compilation of the machine, integer literals are expected to
+be in the range of possible values of the alphtype. Supported alphabet types
+are \verb|char|, \verb|unsigned char|, \verb|short|, \verb|unsigned short|,
+\verb|int|, \verb|unsigned int|, \verb|long|, and \verb|unsigned long|.
+The default is \verb|char|.
+
+\section{Getkey Statement}
+
+\begin{verbatim}
+getkey fpc->id;
+\end{verbatim}
+\verbspace
+
+Specify to Ragel how to retrieve the character that the machine operates on
+from the pointer to the current element (\verb|p|). Any expression that returns
+a value of the alphabet type
+may be used. The getkey statement may be used for looking into element
+structures or for translating the character to process. The getkey expression
+defaults to \verb|(*p)|. In goto-driven machines the getkey expression may be
+evaluated more than once per element processed, therefore it should not incur a
+large cost and preclude optimization.
+
+\section{Access Statement}
+
+\begin{verbatim}
+access fsm->;
+\end{verbatim}
+\verbspace
+
+The access statement allows one to tell Ragel how the generated code should
+access the machine data that is persistent across processing buffer blocks.
+This includes all variables except \verb|p| and \verb|pe|. This includes
+\verb|cs|, \verb|top|, \verb|stack|, \verb|tokstart|, \verb|tokend| and \verb|act|.
+This is useful if a machine is to be encapsulated inside a
+structure in C code. The access statement can be used to give the name of
+a pointer to the structure.
+
+\section{Maintaining Pointers to Input Data}
+
+In the creation of any parser it is not uncommon to require the collection of
+the data being parsed. It is always possible to collect data into a growable
+buffer as the machine moves over it, however the copying of data is a somewhat
+wasteful use of processor cycles. The most efficient way to collect data
+from the parser is to set pointers into the input. This poses a problem for
+uses of Ragel where the input data arrives in blocks, such as over a socket or
+from a file. The program will error if a pointer is set in one buffer block but
+must be used while parsing a following buffer block.
+
+The longest-match constructions exhibit this problem, requiring the maintenance
+code described in Section \ref{generating-scanners}. If a longest-match
+construction has been used somewhere in the machine then it is possible to
+take advantage of the required prefix maintenance code in the driver program to
+ensure pointers to the input are always valid. If laying down a pointer one can
+set \verb|tokstart| at the same spot or ahead of it. When data is shifted in
+between loops the user must also shift the pointer. In this way it is possible
+to maintain pointers to the input that will always be consistent.
+
+\begin{figure}
+\small
+\begin{verbatim}
+ int have = 0;
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ fprintf(stderr, "BUFFER OUT OF SPACE\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. */
+ p = buf;
+ pe = data + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ %% write exec;
+
+ /* How much is still in the buffer? */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ if ( len < space )
+ break;
+ }
+\end{verbatim}
+\caption{An example of line-oriented processing.}
+\label{line-oriented}
+\end{figure}
+
+In general, there are two approaches for guaranteeing the consistency of
+pointers to input data. The first approach is the one just described;
+lay down a marker from an action,
+then later ensure that the data the marker points to is preserved ahead of
+the buffer on the next execute invocation. This approach is good because it
+allows the parser to decide on the pointer-use boundaries, which can be
+arbitrarily complex parsing conditions. A downside is that it requires any
+pointers that are set to be corrected in between execute invocations.
+
+The alternative is to find the pointer-use boundaries before invoking the execute
+routine, then pass in the data using these boundaries. For example, if the
+program must perform line-oriented processing, the user can scan backwards from
+the end of an input block that has just been read in and process only up to the
+first found newline. On the next input read, the new data is placed after the
+partially read line and processing continues from the beginning of the line.
+An example of line-oriented processing is given in Figure \ref{line-oriented}.
+
+
+\section{Running the Executables}
+
+Ragel is broken down into two executables: a frontend which compiles machines
+and emits them in an XML format, and a backend which generates code or a
+Graphviz Dot file from the XML data. The purpose of the XML-based intermediate
+format is to allow users to inspect their compiled state machines and to
+interface Ragel to other tools such as custom visualizers, code generators or
+analysis tools. The intermediate format will provide a better platform for
+extending Ragel to support new host languages. The split also serves to reduce
+complexity of the Ragel program by strictly separating the data structures and
+algorithms that are used to compile machines from those that are used to
+generate code.
+
+\verbspace
+\begin{verbatim}
+[user@host] myproj: ragel file.rl | rlcodegen -G2 -o file.c
+\end{verbatim}
+
+\section{Choosing a Generated Code Style}
+\label{genout}
+
+There are three styles of code output to choose from. Code style affects the
+size and speed of the compiled binary. Changing code style does not require any
+change to the Ragel program. There are two table-driven formats and a goto
+driven format.
+
+In addition to choosing a style to emit, there are various levels of action
+code reuse to choose from. The maximum reuse levels (\verb|-T0|, \verb|-F0|
+and \verb|-G0|) ensure that no FSM action code is ever duplicated by encoding
+each transition's action list as static data and iterating
+through the lists on every transition. This will normally result in a smaller
+binary. The less action reuse options (\verb|-T1|, \verb|-F1| and \verb|-G1|)
+will usually produce faster running code by expanding each transition's action
+list into a single block of code, eliminating the need to iterate through the
+lists. This duplicates action code instead of generating the logic necessary
+for reuse. Consequently the binary will be larger. However, this tradeoff applies to
+machines with moderate to dense action lists only. If a machine's transitions
+frequently have less than two actions then the less reuse options will actually
+produce both a smaller and a faster running binary due to less action sharing
+overhead. The best way to choose the appropriate code style for your
+application is to perform your own tests.
+
+The table-driven FSM represents the state machine as constant static data. There are
+tables of states, transitions, indices and actions. The current state is
+stored in a variable. The execution is simply a loop that looks up the current
+state, looks up the transition to take, executes any actions and moves to the
+target state. In general, the table-driven FSM can handle any machine, produces
+a smaller binary and requires a less expensive host language compile, but
+results in slower running code. Since the table-driven format is the most
+flexible it is the default code style.
+
+The flat table-driven machine is a table-based machine that is optimized for
+small alphabets. Where the regular table machine uses the current character as
+the key in a binary search for the transition to take, the flat table machine
+uses the current character as an index into an array of transitions. This is
+faster in general, however is only suitable if the span of possible characters
+is small.
+
+The goto-driven FSM represents the state machine using goto and switch
+statements. The execution is a flat code block where the transition to take is
+computed using switch statements and directly executable binary searches. In
+general, the goto FSM produces faster code but results in a larger binary and a
+more expensive host language compile.
+
+The goto-driven format has an additional action reuse level (\verb|-G2|) that
+writes actions directly into the state transitioning logic rather than putting
+all the actions together into a single switch. Generally this produces faster
+running code because it allows the machine to encode the current state using
+the processor's instruction pointer. Again, sparse machines may actually
+compile to smaller binaries when \verb|-G2| is used due to less state and
+action management overhead. For many parsing applications \verb|-G2| is the
+preferred output format.
+
+\verbspace
+\begin{center}
+\begin{tabular}{|c|c|}
+\hline
+\multicolumn{2}{|c|}{\bf Code Output Style Options} \\
+\hline
+\verb|-T0|&binary search table-driven\\
+\hline
+\verb|-T1|&binary search, expanded actions\\
+\hline
+\verb|-F0|&flat table-driven\\
+\hline
+\verb|-F1|&flat table, expanded actions\\
+\hline
+\verb|-G0|&goto-driven\\
+\hline
+\verb|-G1|&goto, expanded actions\\
+\hline
+\verb|-G2|&goto, in-place actions\\
+\hline
+\end{tabular}
+\end{center}
+
+\section{Graphviz}
+
+Ragel is able to emit compiled state machines in Graphviz's Dot file format.
+Graphviz support allows users to perform
+incremental visualization of their parsers. User actions are displayed on
+transition labels of the graph. If the final graph is too large to be
+meaningful, or even drawn, the user is able to inspect portions of the parser
+by naming particular regular expression definitions with the \verb|-S| and
+\verb|-M| options to the \verb|ragel| program. Use of Graphviz greatly
+improves the Ragel programming experience. It allows users to learn Ragel by
+experimentation and also to track down bugs caused by unintended
+nondeterminism.
+
+\end{document}
diff --git a/doc/ragel.1.in b/doc/ragel.1.in
new file mode 100644
index 0000000..cdae3e9
--- /dev/null
+++ b/doc/ragel.1.in
@@ -0,0 +1,561 @@
+.\"
+.\" Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
+.\"
+
+.\" This file is part of Ragel.
+.\"
+.\" Ragel is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\"
+.\" Ragel is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with Ragel; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+.\" Process this file with
+.\" groff -man -Tascii ragel.1
+.\"
+.TH RAGEL 1 "@PUBDATE@" "Ragel @VERSION@" "Ragel State Machine Compiler"
+.SH NAME
+ragel \- compile regular languages into executable state machines
+.SH SYNOPSIS
+.B ragel
+.RI [ options ]
+.I file
+.SH DESCRIPTION
+.B Note:
+this is the frontend component of Ragel, which generates an intermediate
+file format that must be processed by rlcodegen(1).
+
+Ragel compiles finite state machines from regular languages into executable
+code. Ragel can generate C, C++, Objective-C, D, or Java code. Ragel state
+machines can not only recognize byte
+sequences as regular expression machines do, but can also execute code at
+arbitrary points in the recognition of a regular language. User code is
+embedded using inline operators that do not disrupt the regular language
+syntax.
+
+The core language consists of standard regular expression operators, such as
+union, concatenation and kleene star, accompanied by action embedding
+operators. Ragel also provides operators that let you control any
+non-determinism that you create, construct scanners using the longest match
+paradigm, and build state machines using the statechart model. It is also
+possible to influence the execution of a state machine from inside an embedded
+action by jumping or calling to other parts of the machine and reprocessing
+input.
+
+Ragel provides a very flexibile interface to the host language that attempts to
+place minimal restrictions on how the generated code is used and integrated
+into the application. The generated code has no dependencies.
+
+.SH OPTIONS
+.TP
+.BR \-h ", " \-H ", " \-? ", " \-\-help
+Display help and exit.
+.TP
+.B \-o " file"
+Write output to file. If -o is not given, a default file name is chosen by
+replacing the suffix of the input. For source files ending in .rh the suffix .h
+is used. For all other source files a suffix based on the output language
+is used (.c, .cpp, .m, .dot)
+.TP
+.B \-n
+Do not perform state minimization.
+.TP
+.B \-m
+Perform minimization once, at the end of the state machine compilation.
+.TP
+.B \-l
+Minimize after nearly every operation. Lists of like operations such as unions
+are minimized once at the end. This is the default minimization option.
+.TP
+.B \-e
+Minimize after every operation.
+.TP
+.B \-S <spec>
+FSM specification to output for -V
+.TP
+.B \-M <machine>
+Machine definition/instantiation to output for -V
+.TP
+.B \-C
+The host language is C, C++, Obj-C or Obj-C++. This is the default host language option.
+.TP
+.B \-D
+The host language is D.
+.TP
+.B \-J
+The host language is Java.
+.SH RAGEL INPUT
+NOTE: This is a very brief description of Ragel input. Ragel is described in
+more detail in the user guide available from the homepage (see below).
+
+Ragel normally passes input files straight to the output. When it sees an FSM
+specification that contains machine instantiations it stops to generate the
+state machine. If there are write statements (such as "write exec") then ragel emits the
+corresponding code. There can be any number of FSM specifications in an input
+file. A multi-line FSM specification starts with '%%{' and ends with '}%%'. A
+single line FSM specification starts with %% and ends at the first newline.
+.SH FSM STATEMENTS
+.TP
+.I Machine Name:
+Set the the name of the machine. If given, it must be the first statement.
+.TP
+.I Alphabet Type:
+Set the data type of the alphabet.
+.TP
+.I GetKey:
+Specify how to retrieve the alphabet character from the element type.
+.TP
+.I Include:
+Include a machine of same name as the current or of a different name in either
+the current file or some other file.
+.TP
+.I Action Definition:
+Define an action that can be invoked by the FSM.
+.TP
+.I Fsm Definition, Instantiation and Longest Match Instantiation:
+Used to build FSMs. Syntax description in next few sections.
+.TP
+.I Access:
+Specify how to access the persistent state machine variables.
+.TP
+.I Write:
+Write some component of the machine.
+.SH BASIC MACHINES
+The basic machines are the base operands of the regular language expressions.
+.TP
+.I 'hello'
+Concat literal. Produces a concatenation of the characters in the string.
+Supports escape sequences with '\\'. The result will have a start state and a
+transition to a new state for each character in the string. The last state in
+the sequence will be made final. To make the string case-insensitive, append
+an 'i' to the string, as in 'cmd'i\fR.
+.TP
+.I \(dqhello\(dq
+Identical to single quote version.
+.TP
+.I [hello]
+Or literal. Produces a union of characters. Supports character ranges
+with '\-', negating the sense of the union with an initial '^' and escape
+sequences with '\\'. The result will have two states with a transition between
+them for each character or range.
+.LP
+NOTE: '', "", and [] produce null FSMs. Null machines have one state that is
+both a start state and a final state and match the zero length string. A null machine
+may be created with the null builtin machine.
+.TP
+.I integer
+Makes a two state machine with one transition on the given integer number.
+.TP
+.I hex
+Makes a two state machine with one transition on the given hexidecimal number.
+.TP
+.I "/simple_regex/"
+A simple regular expression. Supports the notation '.', '*' and '[]', character
+ranges with '\-', negating the sense of an OR expression with and initial '^'
+and escape sequences with '\\'. Also supports one trailing flag: i. Use it to
+produce a case-insensitive regular expression, as in /GET/i.
+.TP
+.I lit .. lit
+Specifies a range. The allowable upper and lower bounds are concat literals of
+length one and number machines.
+For example, 0x10..0x20, 0..63, and 'a'..'z' are valid ranges.
+.TP
+.I "variable_name"
+References the machine definition assigned to the variable name given.
+.TP
+.I "builtin_machine"
+There are several builtin machines available. They are all two state machines
+for the purpose of matching common classes of characters. They are:
+.RS
+.TP
+.B any
+Any character in the alphabet.
+.TP
+.B ascii
+Ascii characters 0..127.
+.TP
+.B extend
+Ascii extended characters. This is the range -128..127 for signed alphabets
+and the range 0..255 for unsigned alphabets.
+.TP
+.B alpha
+Alphabetic characters /[A-Za-z]/.
+.TP
+.B digit
+Digits /[0-9]/.
+.TP
+.B alnum
+Alpha numerics /[0-9A-Za-z]/.
+.TP
+.B lower
+Lowercase characters /[a-z]/.
+.TP
+.B upper
+Uppercase characters /[A-Z]/.
+.TP
+.B xdigit
+Hexidecimal digits /[0-9A-Fa-f]/.
+.TP
+.B cntrl
+Control characters 0..31.
+.TP
+.B graph
+Graphical characters /[!-~]/.
+.TP
+.B print
+Printable characters /[ -~]/.
+.TP
+.B punct
+Punctuation. Graphical characters that are not alpha-numerics
+/[!-/:-@\\[-`{-~]/.
+.TP
+.B space
+Whitespace /[\\t\\v\\f\\n\\r ]/.
+.TP
+.B null
+Zero length string. Equivalent to '', "" and [].
+.TP
+.B empty
+Empty set. Matches nothing.
+.RE
+.SH BRIEF OPERATOR REFERENCE
+Operators are grouped by precedence, group 1 being the lowest and group 6 the
+highest.
+.LP
+.B GROUP 1:
+.TP
+.I expr , expr
+Join machines together without drawing any transitions, setting up a start
+state or any final states. Start state must be explicitly specified with the
+"start" label. Final states may be specified with the an epsilon transitions to
+the implicitly created "final" state.
+.LP
+.B GROUP 2:
+.TP
+.I expr | expr
+Produces a machine that matches any string in machine one or machine two.
+.TP
+.I expr & expr
+Produces a machine that matches any string that is in both machine one and
+machine two.
+.TP
+.I expr - expr
+Produces a machine that matches string that is in machine one but not in
+machine two.
+.LP
+.B GROUP 3:
+.TP
+.I expr . expr
+Produces a machine that matches all the strings in machine one followed
+by all the strings in machine two.
+.LP
+NOTE: Concatenation is the default operator. Two machines next to each other
+with no operator between them results in the concatenation operation.
+.LP
+.B GROUP 4:
+.TP
+.I label: expr
+Attaches a label to an expression. Labels can be used by epsilon transitions
+and fgoto and fcall statements in actions. Also note that the referencing of a
+machine definition causes the implicit creation of label by the same name.
+.LP
+.B GROUP 5:
+.TP
+.I expr -> label
+Draws an epsilon transition to the state defined by label. Label must
+be a name in the current scope. Epsilon transitions are resolved when
+comma operators are evaluated and at the root of the expression tree of
+machine assignment/instantiation.
+.LP
+.B GROUP 6: Actions
+.LP
+An action may be a name predefined with an action statement or may
+be specified directly with '{' and '}' in the expression.
+.TP
+.I expr > action
+Embeds action into starting transitions.
+.TP
+.I expr @ action
+Embeds action into transitions that go into a final state.
+.TP
+.I expr $ action
+Embeds action into all transitions. Does not include pending out transitions.
+.TP
+.I expr % action
+Embeds action into pending out transitions from final states.
+.LP
+.B GROUP 6: EOF Actions
+.LP
+When a machine's finish routine is called the current state's EOF actions are
+executed.
+.TP
+.I expr >/ action
+Embed an EOF action into the start state.
+.TP
+.I expr </ action
+Embed an EOF action into all states except the start state.
+.TP
+.I expr $/ action
+Embed an EOF action into all states.
+.TP
+.I expr %/ action
+Embed an EOF action into final states.
+.TP
+.I expr @/ action
+Embed an EOF action into all states that are not final.
+.TP
+.I expr <>/ action
+Embed an EOF action into all states that are not the start
+state and that are not final (middle states).
+.LP
+.B GROUP 6: Global Error Actions
+.LP
+Global error actions are stored in states until the final state machine has
+been fully constructed. They are then transferred to error transitions, giving
+the effect of a default action.
+.TP
+.I expr >! action
+Embed a global error action into the start state.
+.TP
+.I expr <! action
+Embed a global error action into all states except the start state.
+.TP
+.I expr $! action
+Embed a global error action into all states.
+.TP
+.I expr %! action
+Embed a global error action into the final states.
+.TP
+.I expr @! action
+Embed a global error action into all states which are not final.
+.TP
+.I expr <>! action
+Embed a global error action into all states which are not the start state and
+are not final (middle states).
+.LP
+.B GROUP 6: Local Error Actions
+.LP
+Local error actions are stored in states until the named machine is fully
+constructed. They are then transferred to error transitions, giving the effect
+of a default action for a section of the total machine. Note that the name may
+be omitted, in which case the action will be transferred to error actions upon
+construction of the current machine.
+.TP
+.I expr >^ action
+Embed a local error action into the start state.
+.TP
+.I expr <^ action
+Embed a local error action into all states except the start state.
+.TP
+.I expr $^ action
+Embed a local error action into all states.
+.TP
+.I expr %^ action
+Embed a local error action into the final states.
+.TP
+.I expr @^ action
+Embed a local error action into all states which are not final.
+.TP
+.I expr <>^ action
+Embed a local error action into all states which are not the start state and
+are not final (middle states).
+.LP
+.B GROUP 6: To-State Actions
+.LP
+To state actions are stored in states and executed any time the machine moves
+into a state. This includes regular transitions, and transfers of control such
+as fgoto. Note that setting the current state from outside the machine (for
+example during initialization) does not count as a transition into a state.
+.TP
+.I expr >~ action
+Embed a to-state action action into the start state.
+.TP
+.I expr <~ action
+Embed a to-state action into all states except the start state.
+.TP
+.I expr $~ action
+Embed a to-state action into all states.
+.TP
+.I expr %~ action
+Embed a to-state action into the final states.
+.TP
+.I expr @~ action
+Embed a to-state action into all states which are not final.
+.TP
+.I expr <>~ action
+Embed a to-state action into all states which are not the start state and
+are not final (middle states).
+.LP
+.B GROUP 6: From-State Actions
+.LP
+From state actions are executed whenever a state takes a transition on a character.
+This includes the error transition and a transition to self.
+.TP
+.I expr >* action
+Embed a from-state action into the start state.
+.TP
+.I expr <* action
+Embed a from-state action into every state except the start state.
+.TP
+.I expr $* action
+Embed a from-state action into all states.
+.TP
+.I expr %* action
+Embed a from-state action into the final states.
+.TP
+.I expr @* action
+Embed a from-state action into all states which are not final.
+.TP
+.I expr <>* action
+Embed a from-state action into all states which are not the start state and
+are not final (middle states).
+.LP
+.B GROUP 6: Priority Assignment
+.LP
+Priorities are assigned to names within transitions. Only priorities on the
+same name are allowed to interact. In the first form of priorities the name
+defaults to the name of the machine definition the priority is assigned in.
+Transitions do not have default priorities.
+.TP
+.I expr > int
+Assigns the priority int in all transitions entering into the machine.
+.TP
+.I expr @ int
+Assigns the priority int in all transitions that go into a final state.
+.TP
+.I expr $ int
+Assigns the priority int in all existing transitions.
+.TP
+.I expr % int
+Assigns the priority int in all pending out transitions.
+.LP
+A second form of priority assignment allows the programmer to specify the name
+to which the priority is assigned, allowing interactions to cross machine
+definition boundaries.
+.TP
+.I expr > (name,int)
+Assigns the priority int to name in all transitions entering into the machine.
+.TP
+.I expr @ (name, int)
+Assigns the priority int to name in all transitions that go into a final state.
+.TP
+.I expr $ (name, int)
+Assigns the priority int to name in all existing transitions.
+.TP
+.I expr % (name, int)
+Assigns the priority int to name in all pending out transitions.
+.LP
+.B GROUP 7:
+.TP
+.I expr *
+Produces the kleene star of a machine. Matches zero or more repetitions of the
+machine.
+.TP
+.I expr **
+Longest Matching Kleene Star. This version of kleene star puts a higher
+priority on staying in the machine over wrapping around and starting over. This
+operator is equivalent to ( ( expr ) $0 %1 )*.
+.TP
+.I expr ?
+Produces a machine that accepts the machine given or the null string. This operator
+is equivalent to ( expr | '' ).
+.TP
+.I expr +
+Produces the machine concatenated with the kleen star of itself. Matches one or
+more repetitions of the machine. This operator is equivalent to ( expr . expr* ).
+.TP
+.I expr {n}
+Produces a machine that matches exactly n repetitions of expr.
+.TP
+.I expr {,n}
+Produces a machine that matches anywhere from zero to n repetitions of expr.
+.TP
+.I expr {n,}
+Produces a machine that matches n or more repetitions of expr.
+.TP
+.I expr {n,m}
+Produces a machine that matches n to m repetitions of expr.
+.LP
+.B GROUP 8:
+.TP
+.I ! expr
+Produces a machine that matches any string not matched by the given machine.
+This operator is equivalent to ( *extend - expr ).
+.LP
+.B GROUP 9:
+.TP
+.I ( expr )
+Forces precedence on operators.
+.SH VALUES AVAILABLE IN CODE BLOCKS
+.TP
+.I fc
+The current character. Equivalent to *p.
+.TP
+.I fpc
+A pointer to the current character. Equivalent to p.
+.TP
+.I fcurs
+An integer value representing the current state.
+.TP
+.I ftargs
+An integer value representing the target state.
+.TP
+.I fentry(<label>)
+An integer value representing the entry point <label>.
+.SH STATEMENTS AVAILABLE IN CODE BLOCKS
+.TP
+.I fhold;
+Do not advance over the current character. Equivalent to --p;.
+.TP
+.I fexec <expr>;
+Sets the current character to something else. Equivalent to p = (<expr>)-1;
+.TP
+.I fgoto <label>;
+Jump to the machine defined by <label>.
+.TP
+.I fgoto *<expr>;
+Jump to the entry point given by <expr>. The expression must
+evaluate to an integer value representing a state.
+.TP
+.I fnext <label>;
+Set the next state to be the entry point defined by <label>. The fnext
+statement does not immediately jump to the specified state. Any action code
+following the statement is executed.
+.TP
+.I fnext *<expr>;
+Set the next state to be the entry point given by <expr>. The expression must
+evaluate to an integer value representing a state.
+.TP
+.I fcall <label>;
+Call the machine defined by <label>. The next fret will jump to the
+target of the transition on which the action is invoked.
+.TP
+.I fcall *<expr>;
+Call the entry point given by <expr>. The next fret will jump to the target of
+the transition on which the action is invoked.
+.TP
+.I fret;
+Return to the target state of the transition on which the last fcall was made.
+.TP
+.I fbreak;
+Save the current state and immediately break out of the machine.
+.SH BUGS
+Ragel is still under development and has not yet matured. There are probably
+many bugs.
+.SH CREDITS
+Ragel was written by Adrian Thurston <thurston@cs.queensu.ca>. Objective-C
+output contributed by Eric Ocean. D output contributed by Alan West.
+.SH "SEE ALSO"
+.BR rlcodegen (1),
+.BR re2c (1),
+.BR flex (1)
+
+Homepage: http://www.cs.queensu.ca/home/thurston/ragel/
diff --git a/doc/rlcodegen.1.in b/doc/rlcodegen.1.in
new file mode 100644
index 0000000..516229d
--- /dev/null
+++ b/doc/rlcodegen.1.in
@@ -0,0 +1,107 @@
+.\"
+.\" Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+.\"
+
+.\" This file is part of Ragel.
+.\"
+.\" Ragel is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\"
+.\" Ragel is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with Ragel; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+.\" Process this file with
+.\" groff -man -Tascii rlcodegen.1
+.\"
+.TH RAGEL 1 "@PUBDATE@" "Ragel @VERSION@" "Ragel State Machine Compiler"
+.SH NAME
+rlcodegen \- code generator for Ragel State Machine Compiler
+.SH SYNOPSIS
+.B rlcodegen
+.RI [ options ]
+.I file
+.SH DESCRIPTION
+.B Note:
+this is the backend component of Ragel. This program accepts a machine
+compiled by the frontend program ragel(1) and generates either code or a
+graphviz dot file.
+
+.SH OPTIONS
+.TP
+.BR \-h ", " \-H ", " \-? ", " \-\-help
+Display help and exit.
+.TP
+.B \-o " file"
+Write output to file. If -o is not given, a default file name is chosen by
+replacing the suffix of the input. For source files ending in .rh the suffix .h
+is used. For all other source files a suffix based on the output language
+is used (.c, .cpp, .m, .dot)
+.TP
+.B \-V
+Generate a Graphviz dotfile instead of code. By default this option writes the
+dotfile to standard output. The frontend options -M and -S can be used
+to specify a subset of the grammar to write.
+.TP
+.B \-p
+Print printable characters in Graphviz output.
+.TP
+.B \-T0
+Generate a table driven FSM. This is the default code style. The table driven
+FSM represents the state machine as static data. There are tables of states,
+transitions, indicies and actions. The current state is stored in a variable.
+The execution is a loop that looks that given the current state and current
+character to process looks up the transition to take using a binary search,
+executes any actions and moves to the target state. In general, the table
+driven FSM produces a smaller binary and requires a less expensive host language
+compile but results in slower running code. The table driven FSM is suitable
+for any FSM.
+.TP
+.B \-T1
+Generate a faster table driven FSM by expanding action lists in the action
+execute code.
+.TP
+.B \-F0
+Generate a flat table driven FSM. Transitions are represented as an array
+indexed by the current alphabet character. This eliminates the need for a
+binary search to locate transitions and produces faster code, however it is
+only suitable for small alphabets.
+.TP
+.B \-F1
+Generate a faster flat table driven FSM by expanding action lists in the action
+execute code.
+.TP
+.B \-G0
+Generate a goto driven FSM. The goto driven FSM represents the state machine
+as a series of goto statements. While in the machine, the current state is
+stored by the processor's instruction pointer. The execution is a flat function
+where control is passed from state to state using gotos. In general, the goto
+FSM produces faster code but results in a larger binary and a more expensive
+host language compile.
+.TP
+.B \-G1
+Generate a faster goto driven FSM by expanding action lists in the action
+execute code.
+.TP
+.B \-G2
+Generate a really fast goto driven FSM by embedding action lists in the state
+machine control code.
+.SH BUGS
+Ragel is still under development and has not yet matured. There are probably
+many bugs.
+.SH CREDITS
+Ragel was written by Adrian Thurston <thurston@cs.queensu.ca>. Objective-C
+output contributed by Eric Ocean. D output contributed by Alan West.
+.SH "SEE ALSO"
+.BR ragel (1),
+.BR re2c (1),
+.BR flex (1)
+
+Homepage: http://www.cs.queensu.ca/home/thurston/ragel/
diff --git a/doc/stembed.fig b/doc/stembed.fig
new file mode 100644
index 0000000..eb3ce8d
--- /dev/null
+++ b/doc/stembed.fig
@@ -0,0 +1,72 @@
+#FIG 3.2 Produced by xfig version 3.2.5-alpha5
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 463 1772 463 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 955 1772 955 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 1461 1772 1461 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 1948 1772 1948 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 2403 1772 2403 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 2906 1772 2906 1875
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 3377 173 3510 173
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 3377 881 3510 881
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 3377 532 3510 532
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 3377 1609 3510 1609
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 3377 1260 3510 1260
+4 0 0 50 -1 12 12 0.0000 4 105 240 405 225 >~\001
+4 0 0 50 -1 0 12 0.0000 4 150 1545 3690 585 from-state actions\001
+4 0 0 50 -1 0 12 0.0000 4 150 1290 3690 225 to state actions\001
+4 0 0 50 -1 0 12 0.0000 4 150 1545 3690 1665 local error actions\001
+4 0 0 50 -1 0 12 0.0000 4 150 1095 3690 1305 error actions\001
+4 0 0 50 -1 0 12 0.0000 4 150 1065 3690 945 EOF actions\001
+4 0 0 50 -1 0 12 5.6723 4 120 855 405 2044 start state\001
+4 0 0 50 -1 0 12 5.6723 4 150 360 1409 2071 final\001
+4 0 0 50 -1 0 12 5.6723 4 150 750 901 2038 all states\001
+4 0 0 50 -1 12 12 0.0000 4 165 240 900 225 $~\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 1395 225 %~\001
+4 0 0 50 -1 12 12 0.0000 4 105 240 1890 225 <~\001
+4 0 0 50 -1 12 12 0.0000 4 135 360 2835 225 <>~\001
+4 0 0 50 -1 12 12 0.0000 4 120 360 405 585 >* \001
+4 0 0 50 -1 12 12 0.0000 4 165 240 900 585 $*\001
+4 0 0 50 -1 12 12 0.0000 4 135 360 2835 585 <>*\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 405 1305 >!\001
+4 0 0 50 -1 12 12 0.0000 4 150 240 405 945 >/\001
+4 0 0 50 -1 12 12 0.0000 4 165 240 900 945 $/\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 1395 585 %*\001
+4 0 0 50 -1 12 12 0.0000 4 150 240 1395 945 %/\001
+4 0 0 50 -1 12 12 0.0000 4 150 240 1890 945 </\001
+4 0 0 50 -1 12 12 0.0000 4 150 240 2340 945 @/\001
+4 0 0 50 -1 12 12 0.0000 4 150 360 2835 945 <>/\001
+4 0 0 50 -1 12 12 0.0000 4 105 240 1890 585 <*\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 1890 1305 <!\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 1395 1305 %!\001
+4 0 0 50 -1 12 12 0.0000 4 165 240 900 1305 $!\001
+4 0 0 50 -1 12 12 0.0000 4 165 240 900 1665 $^\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 405 1665 >^\001
+4 0 0 50 -1 12 12 0.0000 4 135 240 1395 1665 %^\001
+4 0 0 50 -1 12 12 0.0000 4 120 240 1890 1665 <^\001
+4 0 0 50 -1 12 12 0.0000 4 135 240 2340 1665 @^\001
+4 0 0 50 -1 12 12 0.0000 4 135 360 2835 1665 <>^\001
+4 0 0 50 -1 12 12 0.0000 4 135 240 2340 1305 @!\001
+4 0 0 50 -1 12 12 0.0000 4 135 360 2835 1305 <>!\001
+4 0 0 50 -1 12 12 0.0000 4 135 240 2340 585 @*\001
+4 0 0 50 -1 12 12 0.0000 4 135 240 2340 225 @~\001
+4 0 0 50 -1 0 12 5.6723 4 150 1635 2860 2053 not start & not final\001
+4 0 0 50 -1 0 12 5.6723 4 120 705 1883 2050 not start\001
+4 0 0 50 -1 0 12 5.6723 4 150 675 2359 2048 not final\001
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 0000000..e1e7808
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,37 @@
+#
+# Copyright 2002-2003 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+SUBDIRS = \
+ atoi awkemu clang concurrent format gotocallret mailbox params rlscan \
+ statechart cppscan
+
+all:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) || exit 1; cd ..; done
+
+ps:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) ps || exit 1; cd ..; done
+
+clean:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean || exit 1; cd ..; done
+
+distclean:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean || exit 1; cd ..; done
+
diff --git a/examples/README b/examples/README
new file mode 100644
index 0000000..12773cb
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,40 @@
+
+ Ragel State Machine Compiler -- Examples
+ ========================================
+
+atoi -- Converts a string to an integer.
+
+awkemu -- Perfoms the basic parsing that the awk program perfoms on input.
+ The awk equivalent to awkemu is in awkemu/awkequiv.awk
+
+clang -- A scanner for a simple C like language. It breaks input up into
+ words, numbers, strings and symbols and strips out whitespace
+ and comments. It is a suitable template for writing a parser
+ that finds a sequence of tokens.
+
+concurrent -- Demonstrates the ability of ragel to produce parsers that
+ perform independent tasks concurrently.
+
+cppscan -- A C++ scanner that uses the longest match scanning method. This
+ example differs from other examples of scanning. Each run of the
+ state machine matches one token. This method results in a
+ smaller state machine since the final kleene star is omitted and
+ therefore every state does not need to get all the transitions
+ of the start state.
+
+format -- Partial printf implementation.
+
+gotocallret -- Demonstrate the use of fgoto, fcall and fret.
+
+mailbox -- Parses unix mailbox files. It breaks files into messages, and
+ messages into headers and body. It demonstrates Ragel's ability
+ to make parsers for structured file formats.
+
+params -- Parses command line arguements.
+
+rlscan -- Lexes Ragel input files.
+
+statechart -- Demonstrate the use of labels, the epsilon operator, and the
+ join operator for creating machines using the named state and
+ transition list paradigm. This implementes the same machine as
+ the atoi example.
diff --git a/examples/atoi/Makefile b/examples/atoi/Makefile
new file mode 100644
index 0000000..901de19
--- /dev/null
+++ b/examples/atoi/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: atoi
+
+ps: atoi.ps
+
+atoi: atoi.o
+ g++ -g -o atoi atoi.o
+
+atoi.cpp: atoi.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) atoi.rl | $(RLCODEGEN) -G2 -o atoi.cpp
+
+atoi.o: atoi.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+atoi.ps: atoi.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) atoi.rl | $(RLCODEGEN) -V | dot -Tps > atoi.ps
+
+distclean clean:
+ rm -Rf *.o atoi.cpp atoi atoi.ps
diff --git a/examples/atoi/atoi.rl b/examples/atoi/atoi.rl
new file mode 100644
index 0000000..0d354a0
--- /dev/null
+++ b/examples/atoi/atoi.rl
@@ -0,0 +1,60 @@
+/*
+ * Convert a string to an integer.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+%%{
+ machine atoi;
+ write data noerror;
+}%%
+
+int atoi( char *str )
+{
+ char *p = str;
+ int cs, val = 0;
+ bool neg = false;;
+
+ %%{
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ main :=
+ ( '-'@see_neg | '+' )? ( digit @add_digit )+
+ '\n' @{ fbreak; };
+
+ # Inintialize and execute.
+ write init;
+ write exec noend;
+ }%%
+
+ if ( neg )
+ val = -1 * val;
+
+ if ( cs < atoi_first_final )
+ cerr << "atoi: there was an error" << endl;
+
+ return val;
+};
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ int value = atoi( buf );
+ cout << value << endl;
+ }
+ return 0;
+}
diff --git a/examples/awkemu/Makefile b/examples/awkemu/Makefile
new file mode 100644
index 0000000..5e6ecde
--- /dev/null
+++ b/examples/awkemu/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: awkemu
+
+ps: awkemu.ps
+
+awkemu: awkemu.o
+ gcc -g -o awkemu awkemu.o
+
+awkemu.c: awkemu.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) awkemu.rl | $(RLCODEGEN) -G2 -o awkemu.c
+
+awkemu.ps: awkemu.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) awkemu.rl | $(RLCODEGEN) -V | dot -Tps > awkemu.ps
+
+%.o: %.c
+ gcc -pedantic -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o awkemu.c awkemu awkemu.ps
diff --git a/examples/awkemu/awkemu.rl b/examples/awkemu/awkemu.rl
new file mode 100644
index 0000000..6615943
--- /dev/null
+++ b/examples/awkemu/awkemu.rl
@@ -0,0 +1,116 @@
+/*
+ * Perform the basic line parsing of input performed by awk.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+%%{
+ machine awkemu;
+
+ action start_word {
+ ws[nwords] = fpc;
+ }
+
+ action end_word {
+ we[nwords++] = fpc;
+ }
+
+ action start_line {
+ nwords = 0;
+ ls = fpc;
+ }
+
+ action end_line {
+ printf("endline(%i): ", nwords );
+ fwrite( ls, 1, p - ls, stdout );
+ printf("\n");
+
+ for ( i = 0; i < nwords; i++ ) {
+ printf(" word: ");
+ fwrite( ws[i], 1, we[i] - ws[i], stdout );
+ printf("\n");
+ }
+ }
+
+ # Words in a line.
+ word = ^[ \t\n]+;
+
+ # The whitespace separating words in a line.
+ whitespace = [ \t];
+
+ # The components in a line to break up. Either a word or a single char of
+ # whitespace. On the word capture characters.
+ blineElements = word >start_word %end_word | whitespace;
+
+ # Star the break line elements. Just be careful to decrement the leaving
+ # priority as we don't want multiple character identifiers to be treated as
+ # multiple single char identifiers.
+ line = ( blineElements** '\n' ) >start_line @end_line;
+
+ # Any number of lines.
+ main := line*;
+}%%
+
+%% write data noerror nofinal;
+
+#define MAXWORDS 256
+#define BUFSIZE 4096
+char buf[BUFSIZE];
+
+int main()
+{
+ int i, nwords = 0;
+ char *ls = 0;
+ char *ws[MAXWORDS];
+ char *we[MAXWORDS];
+
+ int cs;
+ int have = 0;
+
+ %% write init;
+
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+ /* fprintf( stderr, "space: %i\n", space ); */
+
+ if ( space == 0 ) {
+ fprintf(stderr, "buffer out of space\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ /* fprintf( stderr, "len: %i\n", len ); */
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. This is where
+ * we will stop processing on this iteration. */
+ p = buf;
+ pe = buf + have + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ /* fprintf( stderr, "running on: %i\n", pe - p ); */
+
+ %% write exec;
+
+ /* How much is still in the buffer. */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ /* fprintf(stderr, "have: %i\n", have ); */
+
+ if ( len < space )
+ break;
+ }
+
+ if ( have > 0 )
+ fprintf(stderr, "input not newline terminated\n");
+ return 0;
+}
diff --git a/examples/awkemu/awkequiv.awk b/examples/awkemu/awkequiv.awk
new file mode 100755
index 0000000..9877dd3
--- /dev/null
+++ b/examples/awkemu/awkequiv.awk
@@ -0,0 +1,10 @@
+#!/usr/bin/awk -f
+#
+
+
+{
+ print "endline(" NF "): " $0
+ for ( i = 1; i <= NF; i++ ) {
+ print " word: " $i
+ }
+}
diff --git a/examples/clang/Makefile b/examples/clang/Makefile
new file mode 100644
index 0000000..d305406
--- /dev/null
+++ b/examples/clang/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: clang
+
+ps: clang.ps
+
+clang: clang.o
+ gcc -g -o clang clang.o
+
+clang.c: clang.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) clang.rl | $(RLCODEGEN) -G2 -o clang.c
+
+clang.ps: clang.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) clang.rl | $(RLCODEGEN) -V | dot -Tps > clang.ps
+
+%.o: %.c
+ gcc -pedantic -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o clang.c clang clang.ps
diff --git a/examples/clang/clang.rl b/examples/clang/clang.rl
new file mode 100644
index 0000000..7ecfeef
--- /dev/null
+++ b/examples/clang/clang.rl
@@ -0,0 +1,150 @@
+/*
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+%%{
+ machine clang;
+
+ newline = '\n' @{curline += 1;};
+ any_count_line = any | newline;
+
+ # Consume a C comment.
+ c_comment := any_count_line* :>> '*/' @{fgoto main;};
+
+ main := |*
+
+ # Alpha numberic characters or underscore.
+ alnum_u = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alpha_u = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ ( punct - [_'"] ) {
+ printf( "symbol(%i): %c\n", curline, tokstart[0] );
+ };
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ alpha_u alnum_u* {
+ printf( "ident(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Single Quote.
+ sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
+ '\'' . sliteralChar* . '\'' {
+ printf( "single_lit(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Double Quote.
+ dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
+ '"' . dliteralChar* . '"' {
+ printf( "double_lit(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Whitespace is standard ws, newlines and control codes.
+ any_count_line - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ '//' [^\n]* newline;
+
+ '/*' { fgoto c_comment; };
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ digit+ {
+ printf( "int(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ digit+ '.' digit+ {
+ printf( "float(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ '0x' xdigit+ {
+ printf( "hex(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 128
+
+void scanner()
+{
+ static char buf[BUFSIZE];
+ int cs, act, have = 0, curline = 1;
+ char *tokstart, *tokend = 0;
+ int done = 0;
+
+ %% write init;
+
+ while ( !done ) {
+ char *p = buf + have, *pe;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We've used up the entire buffer storing an already-parsed token
+ * prefix that must be preserved. */
+ fprintf(stderr, "OUT OF BUFFER SPACE\n" );
+ exit(1);
+ }
+
+ len = fread( p, 1, space, stdin );
+
+ /* If this is the last buffer, tack on an EOF. */
+ if ( len < space ) {
+ p[len++] = 0;
+ done = 1;
+ }
+
+ pe = p + len;
+ %% write exec;
+
+ if ( cs == clang_error ) {
+ fprintf(stderr, "PARSE ERROR\n" );
+ break;
+ }
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+ tokend = buf + (tokend-tokstart);
+ tokstart = buf;
+ }
+ }
+}
+
+int main()
+{
+ scanner();
+ return 0;
+}
+
diff --git a/examples/concurrent/Makefile b/examples/concurrent/Makefile
new file mode 100644
index 0000000..b9a09f6
--- /dev/null
+++ b/examples/concurrent/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: concurrent
+
+ps: concurrent.ps
+
+concurrent: concurrent.o
+ g++ -g -o concurrent concurrent.o
+
+concurrent.cpp: concurrent.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) concurrent.rl | $(RLCODEGEN) -G2 -o concurrent.cpp
+
+concurrent.ps: concurrent.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) concurrent.rl | $(RLCODEGEN) -V | dot -Tps > concurrent.ps
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o concurrent.cpp concurrent concurrent.ps
diff --git a/examples/concurrent/concurrent.rl b/examples/concurrent/concurrent.rl
new file mode 100644
index 0000000..b70fd5d
--- /dev/null
+++ b/examples/concurrent/concurrent.rl
@@ -0,0 +1,126 @@
+/*
+ * Show off concurrent abilities.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct Concurrent
+{
+ int cur_char;
+ int start_word;
+ int start_comment;
+ int start_literal;
+
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine Concurrent;
+
+ action next_char {
+ cur_char += 1;
+ }
+
+ action start_word {
+ start_word = cur_char;
+ }
+ action end_word {
+ cout << "word: " << start_word <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_comment {
+ start_comment = cur_char;
+ }
+ action end_comment {
+ cout << "comment: " << start_comment <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_literal {
+ start_literal = cur_char;
+ }
+ action end_literal {
+ cout << "literal: " << start_literal <<
+ " " << cur_char-1 << endl;
+ }
+
+ # Count characters.
+ chars = ( any @next_char )*;
+
+ # Words are non-whitespace.
+ word = ( any-space )+ >start_word %end_word;
+ words = ( ( word | space ) $1 %0 )*;
+
+ # Finds C style comments.
+ comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment;
+ comments = ( comment | any )**;
+
+ # Finds single quoted strings.
+ literalChar = ( any - ['\\] ) | ( '\\' . any );
+ literal = ('\'' literalChar* '\'' ) >start_literal %end_literal;
+ literals = ( ( literal | (any-'\'') ) $1 %0 )*;
+
+ main := chars | words | comments | literals;
+}%%
+
+%% write data;
+
+int Concurrent::init( )
+{
+ %% write init;
+ cur_char = 0;
+ return 1;
+}
+
+int Concurrent::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+int Concurrent::finish( )
+{
+ %% write eof;
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+Concurrent concurrent;
+char buf[BUFSIZE];
+
+int main()
+{
+ concurrent.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ concurrent.execute( buf, len );
+ if ( len != BUFSIZE )
+ break;
+ }
+
+ if ( concurrent.finish() <= 0 )
+ cerr << "concurrent: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/cppscan/Makefile b/examples/cppscan/Makefile
new file mode 100644
index 0000000..6a92c82
--- /dev/null
+++ b/examples/cppscan/Makefile
@@ -0,0 +1,41 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+FLEX = flex
+RE2C = re2c
+
+CFLAGS = -Wall -g -O3
+
+all: cppscan lex-cppscan re2c-cppscan
+
+ps: cppscan.ps
+
+cppscan: cppscan.o
+ g++ -g -o $@ $<
+
+lex-cppscan: lex-cppscan.o
+ g++ -g -o $@ $<
+
+re2c-cppscan: re2c-cppscan.o
+ g++ -g -o $@ $<
+
+cppscan.cpp: cppscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) cppscan.rl | $(RLCODEGEN) -G2 -o $@
+
+lex-cppscan.cpp: cppscan.lex
+ $(FLEX) -f -o $@ $<
+
+re2c-cppscan.cpp: cppscan.rec
+ $(RE2C) -s $< > $@
+
+example.cpp: example.rec
+ $(RE2C) -s $< > $@
+
+%.o: %.cpp
+ g++ $(CFLAGS) -c -o $@ $<
+
+cppscan.ps: cppscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) cppscan.rl | $(RLCODEGEN) -V | dot -Tps > cppscan.ps
+
+distclean clean:
+ rm -Rf *.o cppscan.cpp cppscan cppscan.ps \
+ lex-cppscan lex-cppscan.cpp re2c-cppscan re2c-cppscan.cpp
diff --git a/examples/cppscan/cppscan.lex b/examples/cppscan/cppscan.lex
new file mode 100644
index 0000000..fb66253
--- /dev/null
+++ b/examples/cppscan/cppscan.lex
@@ -0,0 +1,143 @@
+/*
+ * flex equivalent to cppscan.rl
+ */
+
+%{
+
+#include <stdio.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+
+%}
+
+%x COMMENT
+
+FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\.
+EXPONENT [eE][+\-]?[0-9]+
+FLOAT_SUFFIX [flFL]
+
+%%
+
+ /* Single and double literals. */
+L?\'([^\'\\\n]|\\.)*\' {
+ token( TK_Slit, yytext, yyleng );
+}
+
+L?\"([^\"\\\n]|\\.)*\" {
+ token( TK_Dlit, yytext, yyleng );
+}
+
+[a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, yytext, yyleng );
+}
+
+{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? {
+ token( TK_Float, yytext, yyleng );
+}
+
+(0|[1-9][0-9]*)[ulUL]{0,3} {
+ token( TK_IntegerDecimal, yytext, yyleng );
+}
+
+0[0-9]+[ulUL]{0,2} {
+ token( TK_IntegerOctal, yytext, yyleng );
+}
+
+0x[0-9a-fA-F]+[ulUL]{0,2} {
+ token( TK_IntegerHex, yytext, yyleng );
+}
+
+:: token( TK_NameSep, yytext, yyleng );
+== token( TK_EqualsEquals, yytext, yyleng );
+!= token( TK_NotEquals, yytext, yyleng );
+&& token( TK_AndAnd, yytext, yyleng );
+\|\| token( TK_OrOr, yytext, yyleng );
+\*= token( TK_MultAssign, yytext, yyleng );
+\/= token( TK_DivAssign, yytext, yyleng );
+%= token( TK_PercentAssign, yytext, yyleng );
+\+= token( TK_PlusAssign, yytext, yyleng );
+-= token( TK_MinusAssign, yytext, yyleng );
+&= token( TK_AmpAssign, yytext, yyleng );
+^= token( TK_CaretAssign, yytext, yyleng );
+\|= token( TK_BarAssign, yytext, yyleng );
+\+\+ token( TK_PlusPlus, yytext, yyleng );
+-- token( TK_MinusMinus, yytext, yyleng );
+-> token( TK_Arrow, yytext, yyleng );
+->\* token( TK_ArrowStar, yytext, yyleng );
+\.\* token( TK_DotStar, yytext, yyleng );
+\.\.\. token( TK_DotDotDot, yytext, yyleng );
+
+\/\* BEGIN(COMMENT);
+<COMMENT>\*\/ BEGIN(INITIAL);
+<COMMENT>(.|\n) { }
+
+\/\/.*\n {}
+[^!-~]+ {}
+
+[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng );
+
+%%
+
+int yywrap()
+{
+ /* Once the input is done, no more. */
+ return 1;
+}
+
+int main()
+{
+ yylex();
+}
diff --git a/examples/cppscan/cppscan.rec b/examples/cppscan/cppscan.rec
new file mode 100644
index 0000000..43f297d
--- /dev/null
+++ b/examples/cppscan/cppscan.rec
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+#define BUFSIZE 8192
+char buf[BUFSIZE];
+
+void fill( int n )
+{
+ printf("fill(%i)\n", n);
+ exit(1);
+}
+
+int main()
+{
+ char *start, *p = buf, *lim = buf, *marker;
+ int len, have, want, shift;
+ int done = 0;
+
+#define YYCTYPE char
+
+#define YYCURSOR p
+#define YYLIMIT lim
+#define YYMARKER marker
+
+#define YYFILL(n) { \
+ if ( ! done ) { \
+ have = lim-start; \
+ if ( start > buf ) { \
+ shift = start-buf; \
+ memmove( buf, start, have ); \
+ start -= shift; \
+ p -= shift; \
+ lim -= shift; \
+ marker -= shift; \
+ } \
+ want = BUFSIZE - have - 1; \
+ len = fread( lim, 1, want, stdin ); \
+ lim += len; \
+ if ( len < want ) { \
+ *lim++ = 0; \
+ done = 1; \
+ } \
+ } \
+ }
+
+again:
+ start = p;
+
+/*!re2c
+
+ANY = [\000-\377];
+FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ ".";
+EXPONENT = [eE] [+\-]? [0-9]+;
+FLOATSUFFIX = [flFL];
+
+ "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" {
+ token( TK_Slit, start, p-start );
+ goto again;
+ }
+
+ "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" {
+ token( TK_Dlit, start, p-start );
+ goto again;
+ }
+
+ [a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, start, p-start );
+ goto again;
+ }
+
+ ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) {
+ token( TK_Float, start, p-start );
+ goto again;
+ }
+
+
+ ( "0" | [1-9][0-9]* ) [ulUL]* {
+ token( TK_IntegerDecimal, start, p-start );
+ goto again;
+ }
+
+ "0" [0-9]+ [ulUL]* {
+ token( TK_IntegerOctal, start, p-start );
+ goto again;
+ }
+
+ "0x" [0-9a-fA-F]+[ulUL]* {
+ token( TK_IntegerHex, start, p-start );
+ goto again;
+ }
+
+ "::" { token( TK_NameSep, start, p-start ); goto again; }
+ "==" { token( TK_EqualsEquals, start, p-start ); goto again; }
+ "!=" { token( TK_NotEquals, start, p-start ); goto again; }
+ "&&" { token( TK_AndAnd, start, p-start ); goto again; }
+ "||" { token( TK_OrOr, start, p-start ); goto again; }
+ "*=" { token( TK_MultAssign, start, p-start ); goto again; }
+ "/=" { token( TK_DivAssign, start, p-start ); goto again; }
+ "%=" { token( TK_PercentAssign, start, p-start ); goto again; }
+ "+=" { token( TK_PlusAssign, start, p-start ); goto again; }
+ "-=" { token( TK_MinusAssign, start, p-start ); goto again; }
+ "&=" { token( TK_AmpAssign, start, p-start ); goto again; }
+ "^=" { token( TK_CaretAssign, start, p-start ); goto again; }
+ "|=" { token( TK_BarAssign, start, p-start ); goto again; }
+ "++" { token( TK_PlusPlus, start, p-start ); goto again; }
+ "--" { token( TK_MinusMinus, start, p-start ); goto again; }
+ "->" { token( TK_Arrow, start, p-start ); goto again; }
+ "->*" { token( TK_ArrowStar, start, p-start ); goto again; }
+ ".*" { token( TK_DotStar, start, p-start ); goto again; }
+ "..." { token( TK_DotDotDot, start, p-start ); goto again; }
+
+ "/*" { goto comment; }
+ "//" (ANY\"\n")* "\n" { goto again; }
+ [\001-\040\177]+ { goto again; }
+
+ [\041-\057\072-\100\133-\140\173-\176] {
+ token( *start, start, p-start );
+ goto again;
+ }
+ "\000" { return 0; }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto again; }
+ ANY { goto comment; }
+*/
+}
diff --git a/examples/cppscan/cppscan.rl b/examples/cppscan/cppscan.rl
new file mode 100644
index 0000000..5c979eb
--- /dev/null
+++ b/examples/cppscan/cppscan.rl
@@ -0,0 +1,207 @@
+/*
+ * A C++ scanner. Uses the longest match construction.
+ * << <= <<= >> >= >>= are left out since angle brackets are used in templates.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <iostream>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+#define BUFSIZE 16384
+
+/* EOF char used to flush out that last token. This should be a whitespace
+ * token. */
+
+#define LAST_CHAR 0
+
+using std::cerr;
+using std::cout;
+using std::cin;
+using std::endl;
+
+static char buf[BUFSIZE];
+static int line = 1, col = 1;
+static char *tokstart, *tokend;
+static int act, have = 0;
+static int cs;
+
+%%{
+ machine Scanner;
+ write data nofinal;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ c_comment :=
+ any* :>> '*/'
+ @{ fgoto main; };
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
+ {token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
+ {token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ {token( TK_Id );};
+
+ # Floating literals.
+ ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? )
+ {token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
+ {token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} )
+ {token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
+ {token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' {token( TK_NameSep );};
+ '==' {token( TK_EqualsEquals );};
+ '!=' {token( TK_NotEquals );};
+ '&&' {token( TK_AndAnd );};
+ '||' {token( TK_OrOr );};
+ '*=' {token( TK_MultAssign );};
+ '/=' {token( TK_DivAssign );};
+ '%=' {token( TK_PercentAssign );};
+ '+=' {token( TK_PlusAssign );};
+ '-=' {token( TK_MinusAssign );};
+ '&=' {token( TK_AmpAssign );};
+ '^=' {token( TK_CaretAssign );};
+ '|=' {token( TK_BarAssign );};
+ '++' {token( TK_PlusPlus );};
+ '--' {token( TK_MinusMinus );};
+ '->' {token( TK_Arrow );};
+ '->*' {token( TK_ArrowStar );};
+ '.*' {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' {token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) {token( tokstart[0] );};
+
+ # Comments and whitespace.
+ '/*' { fgoto c_comment; };
+ '//' [^\n]* '\n';
+ ( any - 33..126 )+;
+
+ *|;
+}%%
+
+void token( int tok )
+{
+ char *data = tokstart;
+ int len = tokend - tokstart;
+
+ cout << '<' << tok << "> ";
+ cout.write( data, len );
+ cout << '\n';
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ %% write init;
+
+ /* Do the first read. */
+ bool done = false;
+ while ( !done ) {
+ char *p = buf + have;
+ int space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ cerr << "OUT OF BUFFER SPACE" << endl;
+ exit(1);
+ }
+
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = LAST_CHAR, len++;
+ done = true;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ /* Now set up the prefix. */
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+ tokend -= (tokstart-buf);
+ tokstart = buf;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/format/Makefile b/examples/format/Makefile
new file mode 100644
index 0000000..d5ac829
--- /dev/null
+++ b/examples/format/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: format
+
+ps: format.ps
+
+format: format.o
+ gcc -g -o format format.o
+
+format.c: format.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) format.rl | $(RLCODEGEN) -G2 -o format.c
+
+format.ps: format.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) format.rl | $(RLCODEGEN) -V | dot -Tps > format.ps
+
+%.o: %.c
+ gcc -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o format.c format format.ps
diff --git a/examples/format/format.rl b/examples/format/format.rl
new file mode 100644
index 0000000..ea5fdfb
--- /dev/null
+++ b/examples/format/format.rl
@@ -0,0 +1,191 @@
+/*
+ * Partial printf implementation.
+ */
+
+#define BUFLEN 1024
+#include <stdio.h>
+
+typedef void (*WriteFunc)( char *data, int len );
+
+struct format
+{
+ char buf[BUFLEN+1];
+ int buflen;
+ WriteFunc write;
+
+ int flags;
+ int width;
+ int prec;
+ int cs;
+};
+
+void do_conv( struct format *fsm, char c )
+{
+ printf( "flags: %x\n", fsm->flags );
+ printf( "width: %i\n", fsm->width );
+ printf( "prec: %i\n", fsm->prec );
+ printf( "conv: %c\n", c );
+ printf( "\n" );
+}
+
+#define FL_HASH 0x01
+#define FL_ZERO 0x02
+#define FL_DASH 0x04
+#define FL_SPACE 0x08
+#define FL_PLUS 0x10
+
+#define FL_HAS_WIDTH 0x0100
+#define FL_WIDTH_ARG 0x0200
+#define FL_HAS_PREC 0x0400
+#define FL_PREC_ARG 0x0800
+
+#define FL_LEN_H 0x010000
+#define FL_LEN_HH 0x020000
+#define FL_LEN_L 0x040000
+#define FL_LEN_LL 0x080000
+
+%%{
+ machine format;
+ access fsm->;
+
+ action clear {
+ fsm->flags = 0;
+ fsm->width = 0;
+ fsm->prec = 0;
+ }
+
+ # A non-zero number.
+ nznum = [1-9] [0-9]*;
+
+ # Width
+ action width_num { fsm->width = 10 * fsm->width + (fc-'0'); }
+ action width_arg { fsm->flags |= FL_WIDTH_ARG; }
+ action width { fsm->flags |= FL_HAS_WIDTH; }
+ width = ( ( nznum $width_num | '*' @width_arg ) %width )?;
+
+ # Precision
+ action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); }
+ action prec_arg { fsm->flags |= FL_PREC_ARG; }
+ action prec { fsm->flags |= FL_HAS_PREC; }
+ precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?;
+
+ # Flags
+ action flags_hash { fsm->flags |= FL_HASH; }
+ action flags_zero { fsm->flags |= FL_ZERO; }
+ action flags_dash { fsm->flags |= FL_DASH; }
+ action flags_space { fsm->flags |= FL_SPACE; }
+ action flags_plus { fsm->flags |= FL_PLUS; }
+
+ flags = (
+ '#' @flags_hash |
+ '0' @flags_zero |
+ '-' @flags_dash |
+ ' ' @flags_space |
+ '+' @flags_plus )*;
+
+ action length_h { fsm->flags |= FL_LEN_H; }
+ action length_l { fsm->flags |= FL_LEN_L; }
+ action length_hh { fsm->flags |= FL_LEN_HH; }
+ action length_ll { fsm->flags |= FL_LEN_LL; }
+
+ # Must use leaving transitions on 'h' and 'l' because they are
+ # prefixes for 'hh' and 'll'.
+ length = (
+ 'h' %length_h |
+ 'l' %length_l |
+ 'hh' @length_hh |
+ 'll' @length_ll )?;
+
+ action conversion {
+ do_conv( fsm, fc );
+ }
+
+ conversion = [diouxXcsp] @conversion;
+
+ fmt_spec =
+ '%' @clear
+ flags
+ width
+ precision
+ length
+ conversion;
+
+ action emit {
+ if ( fsm->buflen == BUFLEN ) {
+ fsm->write( fsm->buf, fsm->buflen );
+ fsm->buflen = 0;
+ }
+ fsm->buf[fsm->buflen++] = fc;
+ }
+
+ action finish_ok {
+ if ( fsm->buflen > 0 )
+ fsm->write( fsm->buf, fsm->buflen );
+ }
+ action finish_err {
+ printf("EOF IN FORMAT\n");
+ }
+ action err_char {
+ printf("ERROR ON CHAR: 0x%x\n", fc );
+ }
+
+ main := (
+ [^%] @emit |
+ '%%' @emit |
+ fmt_spec
+ )* @/finish_err %/finish_ok $!err_char;
+}%%
+
+%% write data;
+
+void format_init( struct format *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void format_execute( struct format *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int format_finish( struct format *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == format_error )
+ return -1;
+ if ( fsm->cs >= format_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define INPUT_BUFSIZE 2048
+
+struct format fsm;
+char buf[INPUT_BUFSIZE];
+
+void write(char *data, int len )
+{
+ fwrite( data, 1, len, stdout );
+}
+
+int main()
+{
+ fsm.write = write;
+ format_init( &fsm );
+ while ( 1 ) {
+ int len = fread( buf, 1, INPUT_BUFSIZE, stdin );
+ format_execute( &fsm, buf, len );
+ if ( len != INPUT_BUFSIZE )
+ break;
+ }
+ if ( format_finish( &fsm ) <= 0 )
+ printf("FAIL\n");
+ return 0;
+}
+
diff --git a/examples/gotocallret/Makefile b/examples/gotocallret/Makefile
new file mode 100644
index 0000000..13f9818
--- /dev/null
+++ b/examples/gotocallret/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: gotocallret
+
+ps: gotocallret.ps
+
+gotocallret: gotocallret.o
+ g++ -g -o gotocallret gotocallret.o
+
+gotocallret.cpp: gotocallret.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) gotocallret.rl | $(RLCODEGEN) -G2 -o gotocallret.cpp
+
+gotocallret.o: gotocallret.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+gotocallret.ps: gotocallret.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) gotocallret.rl | $(RLCODEGEN) -V | dot -Tps > gotocallret.ps
+
+distclean clean:
+ rm -Rf *.o gotocallret.cpp gotocallret gotocallret.ps
diff --git a/examples/gotocallret/gotocallret.rl b/examples/gotocallret/gotocallret.rl
new file mode 100644
index 0000000..84384a9
--- /dev/null
+++ b/examples/gotocallret/gotocallret.rl
@@ -0,0 +1,103 @@
+/*
+ * Demonstrate the use of goto, call and return. This machine expects either a
+ * lower case char or a digit as a command then a space followed by the command
+ * arg. If the command is a char, then the arg must be an a string of chars.
+ * If the command is a digit, then the arg must be a string of digits. This
+ * choice is determined by action code, rather than though transition
+ * desitinations.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+struct GotoCallRet
+{
+ char comm;
+ int cs, top, stack[32];
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine GotoCallRet;
+
+ # Error machine, consumes to end of
+ # line, then starts the main line over.
+ garble_line := (
+ (any-'\n')*'\n'
+ ) >{cout << "error: garbling line" << endl;} @{fgoto main;};
+
+ # Look for a string of alphas or of digits,
+ # on anything else, hold the character and return.
+ alp_comm := alpha+ $!{fhold;fret;};
+ dig_comm := digit+ $!{fhold;fret;};
+
+ # Choose which to machine to call into based on the command.
+ action comm_arg {
+ if ( comm >= 'a' )
+ fcall alp_comm;
+ else
+ fcall dig_comm;
+ }
+
+ # Specifies command string. Note that the arg is left out.
+ command = (
+ [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n'
+ ) @{cout << "correct command" << endl;};
+
+ # Any number of commands. If there is an
+ # error anywhere, garble the line.
+ main := command* $!{fhold;fgoto garble_line;};
+}%%
+
+%% write data;
+
+int GotoCallRet::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int GotoCallRet::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+ if ( cs == GotoCallRet_error )
+ return -1;
+ if ( cs >= GotoCallRet_first_final )
+ return 1;
+ return 0;
+}
+
+int GotoCallRet::finish( )
+{
+ %% write eof;
+ if ( cs == GotoCallRet_error )
+ return -1;
+ if ( cs >= GotoCallRet_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ GotoCallRet gcr;
+ gcr.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ gcr.execute( buf, strlen(buf) );
+ }
+ if ( gcr.finish() <= 0 )
+ cerr << "gotocallret: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/mailbox/Makefile b/examples/mailbox/Makefile
new file mode 100644
index 0000000..94d6680
--- /dev/null
+++ b/examples/mailbox/Makefile
@@ -0,0 +1,16 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: mailbox
+
+mailbox: mailbox.o
+ g++ -g -o mailbox mailbox.o
+
+mailbox.cpp: mailbox.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) mailbox.rl | $(RLCODEGEN) -G2 -o mailbox.cpp
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o mailbox.cpp mailbox mailbox.ps
diff --git a/examples/mailbox/mailbox.rl b/examples/mailbox/mailbox.rl
new file mode 100644
index 0000000..74e3310
--- /dev/null
+++ b/examples/mailbox/mailbox.rl
@@ -0,0 +1,206 @@
+/*
+ * Parses unix mail boxes into headers and bodies.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+/* A growable buffer for collecting headers. */
+struct Buffer
+{
+ Buffer() : data(0), allocated(0), length(0) { }
+ ~Buffer() { empty(); }
+
+ void append( char p ) {
+ if ( ++length > allocated )
+ upAllocate( length*2 );
+ data[length-1] = p;
+ }
+
+ void clear() { length = 0; }
+ void upAllocate( int len );
+ void empty();
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+
+struct MailboxScanner
+{
+ Buffer headName;
+ Buffer headContent;
+
+ int cs, top, stack[1];
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine MailboxScanner;
+
+ # Buffer the header names.
+ action bufHeadName { headName.append(fc); }
+
+ # Prints a blank line after the end of the headers of each message.
+ action blankLine { cout << endl; }
+
+ # Helpers we will use in matching the date section of the from line.
+ day = /[A-Z][a-z][a-z]/;
+ month = /[A-Z][a-z][a-z]/;
+ year = /[0-9][0-9][0-9][0-9]/;
+ time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
+ letterZone = /[A-Z][A-Z][A-Z]/;
+ numZone = /[+\-][0-9][0-9][0-9][0-9]/;
+ zone = letterZone | numZone;
+ dayNum = /[0-9 ][0-9]/;
+
+ # These are the different formats of the date minus an obscure
+ # type that has a funny string 'remote from xxx' on the end. Taken
+ # from c-client in the imap-2000 distribution.
+ date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
+ ( year | year . ' ' . zone | zone . ' ' . year );
+
+ # From lines separate messages. We will exclude fromLine from a message
+ # body line. This will cause us to stay in message line up until an
+ # entirely correct from line is matched.
+ fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n';
+
+ # The types of characters that can be used as a header name.
+ hchar = print - [ :];
+
+ # Simply eat up an uninteresting header. Return at the first non-ws
+ # character following a newline.
+ consumeHeader := (
+ [^\n] |
+ '\n' [ \t] |
+ '\n' [^ \t] @{fhold; fret;}
+ )*;
+
+ action hchar {headContent.append(fc);}
+ action hspace {headContent.append(' ');}
+
+ action hfinish {
+ headContent.append(0);
+ cout << headContent.data << endl;
+ headContent.clear();
+ fhold;
+ fret;
+ }
+
+ # Display the contents of a header as it is consumed. Collapses line
+ # continuations to a single space.
+ printHeader := (
+ [^\n] @hchar |
+ ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace
+ )** $!hfinish;
+
+ action onHeader
+ {
+ headName.append(0);
+ if ( strcmp( headName.data, "From" ) == 0 ||
+ strcmp( headName.data, "To" ) == 0 ||
+ strcmp( headName.data, "Subject" ) == 0 )
+ {
+ /* Print the header name, then jump to a machine the will display
+ * the contents. */
+ cout << headName.data << ":";
+ headName.clear();
+ fcall printHeader;
+ }
+
+ headName.clear();
+ fcall consumeHeader;
+ }
+
+ header = hchar+ $bufHeadName ':' @onHeader;
+
+ # Exclude fromLine from a messageLine, otherwise when encountering a
+ # fromLine we will be simultaneously matching the old message and a new
+ # message.
+ messageLine = ( [^\n]* '\n' - fromLine );
+
+ # An entire message.
+ message = ( fromLine . header* . '\n' @blankLine . messageLine* );
+
+ # File is a series of messages.
+ main := message*;
+}%%
+
+%% write data;
+
+int MailboxScanner::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int MailboxScanner::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+int MailboxScanner::finish( )
+{
+ %% write eof;
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+MailboxScanner mailbox;
+char buf[BUFSIZE];
+
+int main()
+{
+ mailbox.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ mailbox.execute( buf, len );
+ if ( len != BUFSIZE )
+ break;
+ }
+ if ( mailbox.finish() <= 0 )
+ cerr << "mailbox: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/params/Makefile b/examples/params/Makefile
new file mode 100644
index 0000000..98b950c
--- /dev/null
+++ b/examples/params/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: params
+
+ps: params.ps
+
+params: params.o
+ gcc -g -o params params.o
+
+params.c: params.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) params.rl | $(RLCODEGEN) -G2 -o params.c
+
+params.ps: params.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) params.rl | $(RLCODEGEN) -V | dot -Tps > params.ps
+
+%.o: %.c
+ gcc -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o params.c params params.ps
diff --git a/examples/params/params.rl b/examples/params/params.rl
new file mode 100644
index 0000000..3cf908f
--- /dev/null
+++ b/examples/params/params.rl
@@ -0,0 +1,104 @@
+/*
+ * Parse command line arguments.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define BUFLEN 1024
+
+struct params
+{
+ char buffer[BUFLEN+1];
+ int buflen;
+ int cs;
+};
+
+%%{
+ machine params;
+ access fsm->;
+
+ # A buffer to collect argurments
+
+ # Append to the buffer.
+ action append {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = fc;
+ }
+
+ # Terminate a buffer.
+ action term {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = 0;
+ }
+
+ # Clear out the buffer
+ action clear { fsm->buflen = 0; }
+
+ action help { printf("help\n"); }
+ action version { printf("version\n"); }
+ action output { printf("output: \"%s\"\n", fsm->buffer); }
+ action spec { printf("spec: \"%s\"\n", fsm->buffer); }
+ action mach { printf("machine: \"%s\"\n", fsm->buffer); }
+
+ # Helpers that collect strings
+ string = [^\0]+ >clear $append %term;
+
+ # Different arguments.
+ help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help;
+ version = ( '-v' | '--version' ) 0 @version;
+ output = '-o' 0? string 0 @output;
+ spec = '-S' 0? string 0 @spec;
+ mach = '-M' 0? string 0 @mach;
+
+ main := (
+ help |
+ version |
+ output |
+ spec |
+ mach
+ )*;
+}%%
+
+%% write data;
+
+void params_init( struct params *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void params_execute( struct params *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int params_finish( struct params *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == params_error )
+ return -1;
+ if ( fsm->cs >= params_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 2048
+
+int main( int argc, char **argv )
+{
+ int a;
+ struct params params;
+
+ params_init( &params );
+ for ( a = 1; a < argc; a++ )
+ params_execute( &params, argv[a], strlen(argv[a])+1 );
+ if ( params_finish( &params ) != 1 )
+ fprintf( stderr, "params: error processing arguments\n" );
+
+ return 0;
+}
diff --git a/examples/pullscan/Makefile b/examples/pullscan/Makefile
new file mode 100644
index 0000000..1a048ea
--- /dev/null
+++ b/examples/pullscan/Makefile
@@ -0,0 +1,23 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+CFLAGS = -Wall -g -O3
+
+all: pullscan
+
+ps: pullscan.ps
+
+pullscan: pullscan.o
+ g++ -g -o $@ $<
+
+pullscan.c: pullscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) pullscan.rl | $(RLCODEGEN) -G2 -o $@
+
+%.o: %.c
+ gcc $(CFLAGS) -c -o $@ $<
+
+pullscan.ps: pullscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) pullscan.rl | $(RLCODEGEN) -V | dot -Tps > pullscan.ps
+
+distclean clean:
+ rm -Rf *.o pullscan.c pullscan pullscan.ps
diff --git a/examples/pullscan/pullscan.rl b/examples/pullscan/pullscan.rl
new file mode 100644
index 0000000..79e3c49
--- /dev/null
+++ b/examples/pullscan/pullscan.rl
@@ -0,0 +1,166 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define BUFSIZE 4096
+
+typedef struct _Scanner {
+ /* Scanner state. */
+ int cs;
+ int act;
+ int have;
+ int curline;
+ char *tokstart;
+ char *tokend;
+ char *p;
+ char *pe;
+ FILE *file;
+ int done;
+
+ /* Token data */
+ char *data;
+ int len;
+ int value;
+
+ char buf[BUFSIZE];
+} Scanner;
+
+
+void scan_init( Scanner *s, FILE *file )
+{
+ memset (s, '\0', sizeof(Scanner));
+ s->curline = 1;
+ s->file = file;
+}
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 128
+#define TK_EOF 129
+#define TK_Identifier 130
+#define TK_Number 131
+
+
+%%{
+ machine Scanner;
+ write data;
+}%%
+
+#define ret_tok( _tok ) token = _tok; s->data = s->tokstart
+
+int scan( Scanner *s )
+{
+ char *p = s->p;
+ char *pe = s->pe;
+ int token = TK_NO_TOKEN;
+ int space, readlen;
+
+ while ( 1 ) {
+ if ( p == pe ) {
+ printf("scanner: need more data\n");
+
+ if ( s->tokstart == 0 )
+ s->have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ printf("scanner: buffer broken mid token\n");
+ s->have = pe - s->tokstart;
+ memmove( s->buf, s->tokstart, s->have );
+ s->tokend -= (s->tokstart-s->buf);
+ s->tokstart = s->buf;
+ }
+
+ p = s->buf + s->have;
+ space = BUFSIZE - s->have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ printf("scanner: out of buffer space\n");
+ return TK_ERR;
+ }
+
+ if ( s->done ) {
+ printf("scanner: end of file\n");
+ p[0] = 0;
+ readlen = 1;
+ }
+ else {
+ readlen = fread( p, 1, space, s->file );
+ if ( readlen < space )
+ s->done = 1;
+ }
+
+ pe = p + readlen;
+ }
+
+ %%{
+ machine Scanner;
+ access s->;
+
+ main := |*
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* ) =>
+ { ret_tok( TK_Identifier ); fbreak; };
+
+ # Whitespace
+ [ \t\n];
+
+ # Number
+ digit+ =>
+ { ret_tok( TK_Number ); fbreak; };
+
+ # EOF
+ 0 =>
+ { ret_tok( TK_EOF ); fbreak; };
+
+ # Anything else
+ any =>
+ { ret_tok( *p ); fbreak; };
+
+ *|;
+
+ write exec;
+ }%%
+
+ if ( s->cs == Scanner_error )
+ return TK_ERR;
+
+ if ( token != TK_NO_TOKEN ) {
+ /* Save p and pe. fbreak does not advance p. */
+ s->p = p + 1;
+ s->pe = pe;
+ s->len = s->p - s->data;
+ return token;
+ }
+ }
+}
+
+
+int main (int argc, char** argv)
+{
+ Scanner ss;
+ int tok;
+
+ scan_init(&ss, stdin);
+
+ while ( 1 ) {
+ tok = scan (&ss);
+ if ( tok == TK_EOF ) {
+ printf ("parser: EOF\n");
+ break;
+ }
+ else if ( tok == TK_ERR ) {
+ printf ("parser: ERR\n");
+ break;
+ }
+ else {
+ printf ("parser: %d \"", tok);
+ fwrite ( ss.data, 1, ss.len, stdout );
+ printf ("\"\n" );
+ }
+ }
+
+ return 0;
+}
+
+
diff --git a/examples/rlscan/Makefile b/examples/rlscan/Makefile
new file mode 100644
index 0000000..2021d27
--- /dev/null
+++ b/examples/rlscan/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: rlscan
+
+ps: rlscan.ps
+
+rlscan: rlscan.o
+ g++ -g -o rlscan rlscan.o
+
+rlscan.cpp: rlscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) rlscan.rl | $(RLCODEGEN) -G2 -o rlscan.cpp
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+rlscan.ps: rlscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) rlscan.rl | $(RLCODEGEN) -V | dot -Tps > rlscan.ps
+
+distclean clean:
+ rm -Rf *.o rlscan.cpp rlscan rlscan.ps
diff --git a/examples/rlscan/rlscan.rl b/examples/rlscan/rlscan.rl
new file mode 100644
index 0000000..f912b8d
--- /dev/null
+++ b/examples/rlscan/rlscan.rl
@@ -0,0 +1,298 @@
+/*
+ * Lexes Ragel input files.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+void escapeXML( char *data )
+{
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void escapeXML( char c )
+{
+ switch ( c ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << c; break;
+ }
+}
+
+void escapeXML( char *data, int len )
+{
+ for ( char *end = data + len; data != end; data++ ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ }
+}
+
+inline void write( char *data )
+{
+ cout << data;
+}
+
+inline void write( char c )
+{
+ cout << c;
+}
+
+inline void write( char *data, int len )
+{
+ cout.write( data, len );
+}
+
+
+%%{
+ machine RagelScan;
+
+ word = [a-zA-Z_][a-zA-Z_0-9]*;
+ integer = [0-9]+;
+ hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+ default = ^0;
+ EOF = 0;
+
+ # Handles comments in outside code and inline blocks.
+ c_comment :=
+ ( default* :>> '*/' )
+ ${ escapeXML( fc ); }
+ @{ fret; };
+
+ action emit {
+ escapeXML( tokstart, tokend-tokstart );
+ }
+
+ #
+ # Inline action code
+ #
+
+ ilscan := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+ '/*' {
+ write( "/*" );
+ fcall c_comment;
+ };
+ '//' [^\n]* '\n' => emit;
+
+ '{' {
+ write( '{' );
+ inline_depth += 1;
+ };
+
+ '}' {
+ write( '}' );
+ /* If dropping down to the last } then return
+ * to ragel code. */
+ if ( --inline_depth == 0 ) {
+ write( "</inline>\n" );
+ fgoto rlscan;
+ }
+ };
+
+ default => { escapeXML( *tokstart ); };
+ *|;
+
+ #
+ # Ragel Tokens
+ #
+
+ rlscan := |*
+ '}%%' {
+ if ( !single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ '\n' {
+ if ( single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ # Word
+ word {
+ write( "<word>" );
+ write( tokstart, tokend-tokstart );
+ write( "</word>\n" );
+ };
+
+ # Decimal integer.
+ integer {
+ write( "<int>" );
+ write( tokstart, tokend-tokstart );
+ write( "</int>\n" );
+ };
+
+ # Hexidecimal integer.
+ hex {
+ write( "<hex>" );
+ write( tokstart, tokend-tokstart );
+ write( "</hex>\n" );
+ };
+
+ # Consume comments.
+ '#' [^\n]* '\n';
+
+ # Single literal string.
+ "'" ( [^'\\] | /\\./ )* "'" {
+ write( "<single_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</single_lit>\n" );
+ };
+
+ # Double literal string.
+ '"' ( [^"\\] | /\\./ )* '"' {
+ write( "<double_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</double_lit>\n" );
+ };
+
+ # Or literal.
+ '[' ( [^\]\\] | /\\./ )* ']' {
+ write( "<or_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</or_lit>\n" );
+ };
+
+ # Regex Literal.
+ '/' ( [^/\\] | /\\./ ) * '/' {
+ write( "<re_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</re_lit>\n" );
+ };
+
+ # Open an inline block
+ '{' {
+ inline_depth = 1;
+ write( "<inline>{" );
+ fgoto ilscan;
+ };
+
+ punct {
+ write( "<symbol>" );
+ escapeXML( fc );
+ write( "</symbol>\n" );
+ };
+
+ default;
+ *|;
+
+ #
+ # Outside code.
+ #
+
+ main := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+
+ '/*' {
+ escapeXML( tokstart, tokend-tokstart );
+ fcall c_comment;
+ };
+
+ '//' [^\n]* '\n' => emit;
+
+ '%%{' {
+ write( "<section>\n" );
+ single_line = false;
+ fgoto rlscan;
+ };
+
+ '%%' {
+ write( "<section>\n" );
+ single_line = true;
+ fgoto rlscan;
+ };
+
+ default {
+ escapeXML( *tokstart );
+ };
+
+ # EOF.
+ EOF;
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 2048
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ int cs, act;
+ char *tokstart, *tokend;
+ int stack[1], top;
+
+ static char inbuf[BUFSIZE];
+ bool single_line = false;
+ int inline_depth = 0;
+
+ %% write init;
+
+ bool done = false;
+ int have = 0;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ /* Check for EOF. */
+ if ( len == 0 ) {
+ p[0] = 0, len++;
+ done = true;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - tokstart;
+ memmove( inbuf, tokstart, have );
+ tokend = inbuf + (tokend-tokstart);
+ tokstart = inbuf;
+ }
+ }
+ return 0;
+}
diff --git a/examples/statechart/Makefile b/examples/statechart/Makefile
new file mode 100644
index 0000000..3dec7fd
--- /dev/null
+++ b/examples/statechart/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: statechart
+
+ps: statechart.ps
+
+statechart: statechart.o
+ g++ -g -o statechart statechart.o
+
+statechart.cpp: statechart.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) statechart.rl | $(RLCODEGEN) -G2 -o statechart.cpp
+
+statechart.o: statechart.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+statechart.ps: statechart.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) statechart.rl | $(RLCODEGEN) -V | dot -Tps > statechart.ps
+
+distclean clean:
+ rm -Rf *.o statechart.cpp statechart statechart.ps
diff --git a/examples/statechart/statechart.rl b/examples/statechart/statechart.rl
new file mode 100644
index 0000000..cb99a20
--- /dev/null
+++ b/examples/statechart/statechart.rl
@@ -0,0 +1,114 @@
+/*
+ * Demonstrate the use of labels, the epsilon operator, and the join operator
+ * for creating machines using the named state and transition list paradigm.
+ * This implementes the same machine as the atoi example.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+struct StateChart
+{
+ bool neg;
+ int val;
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine StateChart;
+
+ action begin {
+ neg = false;
+ val = 0;
+ }
+
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ action finish {
+ if ( neg )
+ val = -1 * val;
+ }
+
+ atoi = (
+ start: (
+ '-' @see_neg ->om_num |
+ '+' ->om_num |
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # One or more nums.
+ om_num: (
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # Zero ore more nums.
+ more_nums: (
+ [0-9] @add_digit ->more_nums |
+ '' -> final
+ )
+ ) >begin %finish;
+
+ main := ( atoi '\n' @{ cout << val << endl; } )*;
+}%%
+
+%% write data;
+
+int StateChart::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int StateChart::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+int StateChart::finish( )
+{
+ %% write eof;
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ StateChart atoi;
+ atoi.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ atoi.execute( buf, strlen(buf) );
+ }
+ if ( atoi.finish() <= 0 )
+ cerr << "statechart: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/uri/uri.rl b/examples/uri/uri.rl
new file mode 100644
index 0000000..185a76c
--- /dev/null
+++ b/examples/uri/uri.rl
@@ -0,0 +1,31 @@
+%%{
+ machine uri;
+
+ action scheme {}
+ action loc {}
+ action item {}
+ action query {}
+ action last {}
+ action nothing {}
+
+ main :=
+ # Scheme machine. This is ambiguous with the item machine. We commit
+ # to the scheme machine on colon.
+ ( [^:/?#]+ ':' @(colon,1) @scheme )?
+
+ # Location machine. This is ambiguous with the item machine. We remain
+ # ambiguous until a second slash, at that point and all points after
+ # we place a higher priority on staying in the location machine over
+ # moving into the item machine.
+ ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )?
+
+ # Item machine. Ambiguous with both scheme and location, which both
+ # get a higher priority on the characters causing ambiguity.
+ ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )?
+
+ # Last two components, the characters that initiate these machines are
+ # not supported in any previous components, therefore there are no
+ # ambiguities introduced by these parts.
+ ( '?' [^#]* %query %/query)?
+ ( '#' any* %/last )?;
+}%%
diff --git a/ragel.spec b/ragel.spec
new file mode 100644
index 0000000..0dc97a7
--- /dev/null
+++ b/ragel.spec
@@ -0,0 +1,55 @@
+Summary: Ragel State Machine Compiler
+Name: ragel
+Version: 5.16
+Release: 1
+
+URL: http://www.cs.queensu.ca/home/thurston/ragel/
+Vendor: Adrian Thurston
+Packager: Adrian Thurston
+Distribution: Any
+Group: Development/Other
+License: GPL
+
+Source0: http://www.cs.queensu.ca/home/thurston/ragel/%{name}-%{version}.tar.gz
+
+Prefix: /usr
+BuildRoot: %_tmppath/%name-%version-root
+BuildPreReq: gcc, make
+
+%description
+Ragel compiles finite state machines from regular languages into executable C,
+C++, Objective-C or D code. Ragel state machines can not only recognize byte
+sequences as regular expression machines do, but can also execute code at
+arbitrary points in the recognition of a regular language. Using custom
+operators, Ragel allows the user to embed code into a regular language in
+arbitrary places without disrupting the regular language syntax. Ragel also
+provides operators for controlling nondeterminism, constructing machines using
+state charts and building scanners.
+
+%prep
+%setup -q -n %{name}-%{version}
+
+%build
+./configure --prefix=%{prefix}
+make CFLAGS="-O2 -Wall"
+cd doc && make ragel.1 rlcodegen.1
+
+%install
+# Rather than 'make install', let RPM choose where
+# things are kept on this system:
+install -d $RPM_BUILD_ROOT%_bindir
+install -s ragel/ragel $RPM_BUILD_ROOT%_bindir/ragel
+install -s rlcodegen/rlcodegen $RPM_BUILD_ROOT%_bindir/rlcodegen
+install -d $RPM_BUILD_ROOT%_mandir/man1
+install doc/ragel.1 $RPM_BUILD_ROOT%_mandir/man1/ragel.1
+install doc/rlcodegen.1 $RPM_BUILD_ROOT%_mandir/man1/rlcodegen.1
+
+%files
+%defattr(-,root,root)
+%_bindir/ragel
+%_bindir/rlcodegen
+%_mandir/man1/ragel.1
+%_mandir/man1/rlcodegen.1
+
+%clean
+ rm -rf $RPM_BUILD_ROOT
diff --git a/ragel.vim b/ragel.vim
new file mode 100644
index 0000000..4fc9c5b
--- /dev/null
+++ b/ragel.vim
@@ -0,0 +1,161 @@
+" Vim syntax file
+"
+" Language: Ragel
+" Author: Adrian Thurston
+
+syntax clear
+
+"
+" Outside code
+"
+
+" Comments
+syntax region ocComment start="\/\*" end="\*\/"
+syntax match ocComment "\/\/.*$"
+
+" Anything preprocessor
+syntax match ocPreproc "#.*$"
+
+" Strings
+syntax match ocLiteral "'\(\\.\|[^'\\]\)*'"
+syntax match ocLiteral "\"\(\\.\|[^\"\\]\)*\""
+
+" C/C++ Keywords
+syntax keyword ocType unsigned signed void char short int long float double bool
+syntax keyword ocType inline static extern register const volatile auto
+syntax keyword ocType union enum struct class typedef
+syntax keyword ocType namespace template typename mutable
+syntax keyword ocKeyword break continue default do else for
+syntax keyword ocKeyword goto if return switch while
+syntax keyword ocKeyword new delete this using friend public private protected sizeof
+syntax keyword ocKeyword throw try catch operator typeid
+syntax keyword ocKeyword and bitor xor compl bitand and_eq or_eq xor_eq not not_eq
+syntax keyword ocKeyword static_cast dynamic_cast
+
+" D Keywords
+syntax keyword ocType wchar dchar bit byte ubyte ushort uint ulong cent ucent
+syntax keyword ocType cfloat ifloat cdouble idouble real creal ireal
+syntax keyword ocKeyword abstract alias align asm assert body cast debug delegate
+syntax keyword ocKeyword deprecated export final finally foreach function import in inout
+syntax keyword ocKeyword interface invariant is mixin module out override package pragma
+syntax keyword ocKeyword super synchronized typeof unittest version with
+
+" Java Keywords
+syntax keyword ocType byte short char int
+
+" Objective-C Directives
+syntax match ocKeyword "@public\|@private\|@protected"
+syntax match ocKeyword "@interface\|@implementation"
+syntax match ocKeyword "@class\|@end\|@defs"
+syntax match ocKeyword "@encode\|@protocol\|@selector"
+
+" Numbers
+syntax match ocNumber "[0-9][0-9]*"
+syntax match ocNumber "0x[0-9a-fA-F][0-9a-fA-F]*"
+
+" Booleans
+syntax keyword ocBoolean true false
+
+" Identifiers
+syntax match anyId "[a-zA-Z_][a-zA-Z_0-9]*"
+
+" Inline code only
+syntax keyword fsmType fpc fc fcurs fbuf fblen ftargs fstack
+syntax keyword fsmKeyword fhold fgoto fcall fret fentry fnext fexec fbreak
+
+syntax cluster rlItems contains=rlComment,rlLiteral,rlAugmentOps,rlOtherOps,rlKeywords,rlWrite,rlCodeCurly,rlCodeSemi,rlNumber,anyId,rlLabelColon,rlExprKeywords
+
+syntax region machineSpec1 matchgroup=beginRL start="%%{" end="}%%" contains=@rlItems
+syntax region machineSpec2 matchgroup=beginRL start="%%[^{]"rs=e-1 end="$" keepend contains=@rlItems
+syntax region machineSpec2 matchgroup=beginRL start="%%$" end="$" keepend contains=@rlItems
+
+" Comments
+syntax match rlComment "#.*$" contained
+
+" Literals
+syntax match rlLiteral "'\(\\.\|[^'\\]\)*'[i]*" contained
+syntax match rlLiteral "\"\(\\.\|[^\"\\]\)*\"[i]*" contained
+syntax match rlLiteral /\/\(\\.\|[^\/\\]\)*\/[i]*/ contained
+syntax match rlLiteral "\[\(\\.\|[^\]\\]\)*\]" contained
+
+" Numbers
+syntax match rlNumber "[0-9][0-9]*" contained
+syntax match rlNumber "0x[0-9a-fA-F][0-9a-fA-F]*" contained
+
+" Operators
+syntax match rlAugmentOps "[>$%@]" contained
+syntax match rlAugmentOps "<>\|<" contained
+syntax match rlAugmentOps "[>\<$%@][!\^/*~]" contained
+syntax match rlAugmentOps "[>$%]?" contained
+syntax match rlAugmentOps "<>[!\^/*~]" contained
+syntax match rlAugmentOps "=>" contained
+syntax match rlOtherOps "->" contained
+
+syntax match rlOtherOps ":>" contained
+syntax match rlOtherOps ":>>" contained
+syntax match rlOtherOps "<:" contained
+
+" Keywords
+syntax keyword rlKeywords machine action context include range contained
+syntax keyword rlExprKeywords when err lerr eof from to contained
+
+" Case Labels
+syntax keyword caseLabelKeyword case contained
+syntax cluster caseLabelItems contains=ocComment,ocPreproc,ocLiteral,ocType,ocKeyword,caseLabelKeyword,ocNumber,ocBoolean,anyId,fsmType,fsmKeyword
+syntax match caseLabelColon "case" contains=@caseLabelItems
+syntax match caseLabelColon "case[\t ]\+.*:$" contains=@caseLabelItems
+syntax match caseLabelColon "case[\t ]\+.*:[^=:]"me=e-1 contains=@caseLabelItems
+
+" Labels
+syntax match ocLabelColon "^[\t ]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:$" contains=anyLabel
+syntax match ocLabelColon "^[\t ]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:[^=:]"me=e-1 contains=anyLabel
+
+syntax match rlLabelColon "[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:$" contained contains=anyLabel
+syntax match rlLabelColon "[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:[^=:>]"me=e-1 contained contains=anyLabel
+syntax match anyLabel "[a-zA-Z_][a-zA-Z_0-9]*" contained
+
+" All items that can go in a code block.
+
+syntax cluster inlineItems contains=rlCodeCurly,ocComment,ocPreproc,ocLiteral,ocType,ocKeyword,ocNumber,ocBoolean,ocLabelColon,anyId,fsmType,fsmKeyword,caseLabelColon
+
+" Blocks of code. rlCodeCurly is recursive.
+syntax region rlCodeCurly matchgroup=NONE start="{" end="}" contained contains=@inlineItems
+syntax region rlCodeSemi matchgroup=Type start="\<alphtype\>" start="\<getkey\>" start="\<access\>" start="\<variable\>" matchgroup=NONE end=";" contained contains=@inlineItems
+
+syntax region rlWrite matchgroup=Type start="\<write\>" matchgroup=NONE end=";" contained contains=rlWriteKeywords,rlWriteOptions
+
+syntax keyword rlWriteKeywords init data exec eof contained
+syntax keyword rlWriteOptions noerror nofinal noprefix noend contained
+
+"
+" Sync at the start of machine specs.
+"
+syntax sync match ragelSyncPat grouphere NONE "%%{&"
+syntax sync match ragelSyncPat grouphere NONE "%%[^{]&"
+syntax sync match ragelSyncPat grouphere NONE "}%%"
+
+"
+" Specifying Groups
+"
+hi link ocComment Comment
+hi link ocPreproc Macro
+hi link ocLiteral String
+hi link ocType Type
+hi link ocKeyword Keyword
+hi link ocNumber Number
+hi link ocBoolean Boolean
+hi link rlComment Comment
+hi link rlNumber Number
+hi link rlLiteral String
+hi link rlAugmentOps Keyword
+hi link rlExprKeywords Keyword
+hi link rlWriteKeywords Keyword
+hi link rlWriteOptions Keyword
+hi link rlKeywords Type
+hi link fsmType Type
+hi link fsmKeyword Keyword
+hi link anyLabel Label
+hi link caseLabelKeyword Keyword
+hi link beginRL Type
+
+let b:current_syntax = "ragel"
diff --git a/ragel/Makefile.in b/ragel/Makefile.in
new file mode 100644
index 0000000..9b252a7
--- /dev/null
+++ b/ragel/Makefile.in
@@ -0,0 +1,85 @@
+#
+# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+INCS += -I../common -I../aapl
+DEFS +=
+
+CFLAGS += -g -Wall
+LDFLAGS +=
+
+CC_SRCS = \
+ main.cpp parsetree.cpp parsedata.cpp fsmstate.cpp fsmbase.cpp \
+ fsmattach.cpp fsmmin.cpp fsmgraph.cpp fsmap.cpp xmlcodegen.cpp \
+ rlscan.cpp rlparse.cpp
+
+GEN_SRC = rlscan.cpp rlparse.h rlparse.cpp
+
+LIBS += @LIBS@
+PREFIX = @prefix@
+
+BUILD_PARSERS = @BUILD_PARSERS@
+
+#*************************************
+
+# Programs
+CXX = @CXX@
+
+# Get objects and dependencies from sources.
+OBJS = $(CC_SRCS:%.cpp=%.o)
+DEPS = $(CC_SRCS:%.cpp=.%.d)
+
+# Rules.
+all: ragel
+
+ragel: $(GEN_SRC) $(OBJS)
+ $(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+
+ifeq ($(BUILD_PARSERS),true)
+
+rlparse.h: rlparse.kh
+ kelbt -o $@ $<
+
+rlparse.cpp: rlparse.kl rlparse.kh
+ kelbt -o $@ $<
+
+rlscan.cpp: rlscan.rl
+ ragel $< | rlcodegen -G2 -o $@
+
+endif
+
+%.o: %.cpp
+ @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d
+ $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $<
+
+distclean: clean
+ rm -f Makefile
+
+ifeq ($(BUILD_PARSERS),true)
+EXTRA_CLEAN = $(GEN_SRC)
+endif
+
+clean:
+ rm -f tags .*.d *.o ragel $(EXTRA_CLEAN)
+
+install: all
+ install -d $(PREFIX)/bin
+ install -s ragel $(PREFIX)/bin/ragel
+
+-include $(DEPS)
diff --git a/ragel/fsmap.cpp b/ragel/fsmap.cpp
new file mode 100644
index 0000000..551aea0
--- /dev/null
+++ b/ragel/fsmap.cpp
@@ -0,0 +1,840 @@
+/*
+ * Copyright 2002-2004 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "fsmgraph.h"
+#include <iostream>
+using std::cerr;
+using std::endl;
+
+CondData *condData = 0;
+KeyOps *keyOps = 0;
+
+/* Insert an action into an action table. */
+void ActionTable::setAction( int ordering, Action *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void ActionTable::setActions( const ActionTable &other )
+{
+ for ( ActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ActionTable::setActions( int *orderings, Action **actions, int nActs )
+{
+ for ( int a = 0; a < nActs; a++ )
+ insertMulti( orderings[a], actions[a] );
+}
+
+bool ActionTable::hasAction( Action *action )
+{
+ for ( int a = 0; a < length(); a++ ) {
+ if ( data[a].value == action )
+ return true;
+ }
+ return false;
+}
+
+/* Insert an action into an action table. */
+void LmActionTable::setAction( int ordering, LongestMatchPart *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void LmActionTable::setActions( const LmActionTable &other )
+{
+ for ( LmActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ErrActionTable::setAction( int ordering, Action *action, int transferPoint )
+{
+ insertMulti( ErrActionTableEl( action, ordering, transferPoint ) );
+}
+
+void ErrActionTable::setActions( const ErrActionTable &other )
+{
+ for ( ErrActionTable::Iter act = other; act.lte(); act++ )
+ insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) );
+}
+
+/* Insert a priority into this priority table. Looks out for priorities on
+ * duplicate keys. */
+void PriorTable::setPrior( int ordering, PriorDesc *desc )
+{
+ PriorEl *lastHit = 0;
+ PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit );
+ if ( insed == 0 ) {
+ /* This already has a priority on the same key as desc. Overwrite the
+ * priority if the ordering is larger (later in time). */
+ if ( ordering >= lastHit->ordering )
+ *lastHit = PriorEl( ordering, desc );
+ }
+}
+
+/* Set all the priorities from a priorTable in this table. */
+void PriorTable::setPriors( const PriorTable &other )
+{
+ /* Loop src priorities once to overwrite duplicates. */
+ PriorTable::Iter priorIt = other;
+ for ( ; priorIt.lte(); priorIt++ )
+ setPrior( priorIt->ordering, priorIt->desc );
+}
+
+/* Set the priority of starting transitions. Isolates the start state so it has
+ * no other entry points, then sets the priorities of all the transitions out
+ * of the start state. If the start state is final, then the outPrior of the
+ * start state is also set. The idea is that a machine that accepts the null
+ * string can still specify the starting trans prior for when it accepts the
+ * null word. */
+void FsmAp::startFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk all transitions out of the start state. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of all transitions in a graph. Walks all transition lists
+ * and all def transitions. */
+void FsmAp::allTransPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk the list of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+ }
+}
+
+/* Set the priority of all transitions that go into a final state. Note that if
+ * any entry states are final, we will not be setting the priority of any
+ * transitions that may go into those states in the future. The graph does not
+ * support pending in transitions in the same way pending out transitions are
+ * supported. */
+void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk all in transitions of the final state. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of any future out transitions that may be made going out of
+ * this state machine. */
+void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Set priority in all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outPriorTable.setPrior( ordering, prior );
+}
+
+
+/* Set actions to execute on starting transitions. Isolates the start state
+ * so it has no other entry points, then adds to the transition functions
+ * of all the transitions out of the start state. If the start state is final,
+ * then the func is also added to the start state's out func list. The idea is
+ * that a machine that accepts the null string can execute a start func when it
+ * matches the null word, which can only be done when leaving the start/final
+ * state. */
+void FsmAp::startFsmAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk the start state's transitions, setting functions. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Set functions to execute on all transitions. Walks the out lists of all
+ * states. */
+void FsmAp::allTransAction( int ordering, Action *action )
+{
+ /* Walk all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+ }
+}
+
+/* Specify functions to execute upon entering final states. If the start state
+ * is final we can't really specify a function to execute upon entering that
+ * final state the first time. So function really means whenever entering a
+ * final state from within the same fsm. */
+void FsmAp::finishFsmAction( int ordering, Action *action )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Add functions to any future out transitions that may be made going out of
+ * this state machine. */
+void FsmAp::leaveFsmAction( int ordering, Action *action )
+{
+ /* Insert the action in the outActionTable of all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outActionTable.setAction( ordering, action );
+}
+
+/* Add functions to the longest match action table for constructing scanners. */
+void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->lmActionTable.setAction( ordering, lmPart );
+ }
+}
+
+void FsmAp::fillGaps( StateAp *state )
+{
+ if ( state->outList.length() == 0 ) {
+ /* Add the range on the lower and upper bound. */
+ attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey );
+ }
+ else {
+ TransList srcList;
+ srcList.transfer( state->outList );
+
+ /* Check for a gap at the beginning. */
+ TransList::Iter trans = srcList, next;
+ if ( keyOps->minKey < trans->lowKey ) {
+ /* Make the high key and append. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, keyOps->minKey, highKey );
+ }
+
+ /* Write the transition. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ Key lastHigh = trans->highKey;
+
+ /* Loop each source range. */
+ for ( trans = next; trans.lte(); trans = next ) {
+ /* Make the next key following the last range. */
+ Key nextKey = lastHigh;
+ nextKey.increment();
+
+ /* Check for a gap from last up to here. */
+ if ( nextKey < trans->lowKey ) {
+ /* Make the high end of the range that fills the gap. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, nextKey, highKey );
+ }
+
+ /* Reduce the transition. If it reduced to anything then add it. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ lastHigh = trans->highKey;
+ }
+
+ /* Now check for a gap on the end to fill. */
+ if ( lastHigh < keyOps->maxKey ) {
+ /* Get a copy of the default. */
+ lastHigh.increment();
+
+ attachNewTrans( state, 0, lastHigh, keyOps->maxKey );
+ }
+ }
+}
+
+void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+
+/* Give a target state for error transitions. */
+void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings,
+ Action **actions, int nActs )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error target in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 ) {
+ /* The trans goes to error, redirect it. */
+ redirectErrorTrans( trans->fromState, target, trans );
+ trans->actionTable.setActions( orderings, actions, nActs );
+ }
+ }
+}
+
+void FsmAp::transferErrorActions( StateAp *state, int transferPoint )
+{
+ for ( int i = 0; i < state->errActionTable.length(); ) {
+ ErrActionTableEl *act = state->errActionTable.data + i;
+ if ( act->transferPoint == transferPoint ) {
+ /* Transfer the error action and remove it. */
+ setErrorAction( state, act->ordering, act->action );
+ state->errActionTable.vremove( i );
+ }
+ else {
+ /* Not transfering and deleting, skip over the item. */
+ i += 1;
+ }
+ }
+}
+
+/* Set error actions in the start state. */
+void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in all states where there is a transition out. */
+void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Insert actions in the error action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in final states. */
+void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set error actions in the states that have transitions into a final state. */
+void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Isolate the start state in case it is reachable from in inside the
+ * machine, in which case we don't want it set. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set EOF actions in the start state. */
+void FsmAp::startEOFAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in all states where there is a transition out. */
+void FsmAp::allEOFAction( int ordering, Action *action )
+{
+ /* Insert actions in the EOF action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in final states. */
+void FsmAp::finalEOFAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->eofActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set EOF actions in the states that have transitions into a final state. */
+void FsmAp::middleEOFAction( int ordering, Action *action )
+{
+ /* Set the actions in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set To State Actions.
+ */
+
+/* Set to state actions in the start state. */
+void FsmAp::startToStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in all states. */
+void FsmAp::allToStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in final states. */
+void FsmAp::finalToStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->toStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set to state actions in states that are not final and not the start state. */
+void FsmAp::middleToStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set From State Actions.
+ */
+
+void FsmAp::startFromStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::allFromStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::finalFromStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::middleFromStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Shift the function ordering of the start transitions to start
+ * at fromOrder and increase in units of 1. Useful before staring.
+ * Returns the maximum number of order numbers used. */
+int FsmAp::shiftStartActionOrder( int fromOrder )
+{
+ int maxUsed = 0;
+
+ /* Walk the start state's transitions, shifting function ordering. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ /* Walk the function data for the transition and set the keys to
+ * increasing values starting at fromOrder. */
+ int curFromOrder = fromOrder;
+ ActionTable::Iter action = trans->actionTable;
+ for ( ; action.lte(); action++ )
+ action->key = curFromOrder++;
+
+ /* Keep track of the max number of orders used. */
+ if ( curFromOrder - fromOrder > maxUsed )
+ maxUsed = curFromOrder - fromOrder;
+ }
+
+ return maxUsed;
+}
+
+/* Remove all priorities. */
+void FsmAp::clearAllPriorities()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Clear out priority data. */
+ state->outPriorTable.empty();
+
+ /* Clear transition data from the out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ trans->priorTable.empty();
+ }
+}
+
+/* Zeros out the function ordering keys. This may be called before minimization
+ * when it is known that no more fsm operations are going to be done. This
+ * will achieve greater reduction as states will not be separated on the basis
+ * of function ordering. */
+void FsmAp::nullActionKeys( )
+{
+ /* For each state... */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the transitions for the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Walk the action table for the transition. */
+ for ( ActionTable::Iter action = trans->actionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Walk the action table for the transition. */
+ for ( LmActionTable::Iter action = trans->lmActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+
+ /* Null the action keys of the to state action table. */
+ for ( ActionTable::Iter action = state->toStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the from state action table. */
+ for ( ActionTable::Iter action = state->fromStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the out transtions. */
+ for ( ActionTable::Iter action = state->outActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the error action table. */
+ for ( ErrActionTable::Iter action = state->errActionTable;
+ action.lte(); action++ )
+ action->ordering = 0;
+
+ /* Null the action keys eof action table. */
+ for ( ActionTable::Iter action = state->eofActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+}
+
+/* Walk the list of states and verify that non final states do not have out
+ * data, that all stateBits are cleared, and that there are no states with
+ * zero foreign in transitions. */
+void FsmAp::verifyStates()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Non final states should not have leaving data. */
+ if ( ! (state->stateBits & SB_ISFINAL) ) {
+ assert( state->outActionTable.length() == 0 );
+ assert( state->outCondSet.length() == 0 );
+ assert( state->outPriorTable.length() == 0 );
+ }
+
+ /* Data used in algorithms should be cleared. */
+ assert( (state->stateBits & SB_BOTH) == 0 );
+ assert( state->foreignInTrans > 0 );
+ }
+}
+
+/* Compare two transitions according to their relative priority. Since the
+ * base transition has no priority associated with it, the default is to
+ * return equal. */
+int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 )
+{
+ /* Looking for differing priorities on same keys. Need to concurrently
+ * scan the priority lists. */
+ PriorTable::Iter pd1 = priorTable1;
+ PriorTable::Iter pd2 = priorTable2;
+ while ( pd1.lte() && pd2.lte() ) {
+ /* Check keys. */
+ if ( pd1->desc->key < pd2->desc->key )
+ pd1.increment();
+ else if ( pd1->desc->key > pd2->desc->key )
+ pd2.increment();
+ /* Keys are the same, check priorities. */
+ else if ( pd1->desc->priority < pd2->desc->priority )
+ return -1;
+ else if ( pd1->desc->priority > pd2->desc->priority )
+ return 1;
+ else {
+ /* Keys and priorities are equal, advance both. */
+ pd1.increment();
+ pd2.increment();
+ }
+ }
+
+ /* No differing priorities on the same key. */
+ return 0;
+}
+
+/* Compares two transitions according to priority and functions. Pointers
+ * should not be null. Does not consider to state or from state. Compare two
+ * transitions according to the data contained in the transitions. Data means
+ * any properties added to user transitions that may differentiate them. Since
+ * the base transition has no data, the default is to return equal. */
+int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 )
+{
+ /* Compare the prior table. */
+ int cmpRes = CmpPriorTable::compare( trans1->priorTable,
+ trans2->priorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare longest match action tables. */
+ cmpRes = CmpLmActionTable::compare(trans1->lmActionTable,
+ trans2->lmActionTable);
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare action tables. */
+ return CmpActionTable::compare(trans1->actionTable,
+ trans2->actionTable);
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void FsmAp::addInTrans( TransAp *destTrans, TransAp *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans == destTrans ) {
+ /* Adding in ourselves, need to make a copy of the source transitions.
+ * The priorities are not copied in as that would have no effect. */
+ destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) );
+ destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) );
+ }
+ else {
+ /* Not a copy of ourself, get the functions and priorities. */
+ destTrans->lmActionTable.setActions( srcTrans->lmActionTable );
+ destTrans->actionTable.setActions( srcTrans->actionTable );
+ destTrans->priorTable.setPriors( srcTrans->priorTable );
+ }
+}
+
+/* Compare the properties of states that are embedded by users. Compares out
+ * priorities, out transitions, to, from, out, error and eof action tables. */
+int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 )
+{
+ /* Compare the out priority table. */
+ int cmpRes = CmpPriorTable::
+ compare( state1->outPriorTable, state2->outPriorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test to state action tables. */
+ cmpRes = CmpActionTable::compare( state1->toStateActionTable,
+ state2->toStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test from state action tables. */
+ cmpRes = CmpActionTable::compare( state1->fromStateActionTable,
+ state2->fromStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out action tables. */
+ cmpRes = CmpActionTable::compare( state1->outActionTable,
+ state2->outActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out condition sets. */
+ cmpRes = CmpActionSet::compare( state1->outCondSet,
+ state2->outCondSet );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out error action tables. */
+ cmpRes = CmpErrActionTable::compare( state1->errActionTable,
+ state2->errActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test eof action tables. */
+ return CmpActionTable::compare( state1->eofActionTable,
+ state2->eofActionTable );
+}
+
+
+/* Invoked when a state looses its final state status and the leaving
+ * transition embedding data should be deleted. */
+void FsmAp::clearOutData( StateAp *state )
+{
+ /* Kill the out actions and priorities. */
+ state->outActionTable.empty();
+ state->outCondSet.empty();
+ state->outPriorTable.empty();
+}
+
+bool FsmAp::hasOutData( StateAp *state )
+{
+ return ( state->outActionTable.length() > 0 ||
+ state->outCondSet.length() > 0 ||
+ state->outPriorTable.length() > 0 );
+}
+
+/*
+ * Setting Conditions.
+ */
+
+
+void logNewExpansion( Expansion *exp );
+void logCondSpace( CondSpace *condSpace );
+
+CondSpace *FsmAp::addCondSpace( const CondSet &condSet )
+{
+ CondSpace *condSpace = condData->condSpaceMap.find( condSet );
+ if ( condSpace == 0 ) {
+ Key baseKey = condData->nextCondKey;
+ condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
+
+ condSpace = new CondSpace( condSet );
+ condSpace->baseKey = baseKey;
+ condData->condSpaceMap.insert( condSpace );
+
+ #ifdef LOG_CONDS
+ cerr << "adding new condition space" << endl;
+ cerr << " condition set: ";
+ logCondSpace( condSpace );
+ cerr << endl;
+ cerr << " baseKey: " << baseKey.getVal() << endl;
+ #endif
+ }
+ return condSpace;
+}
+
+void FsmAp::startFsmCondition( Action *condAction )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ embedCondition( startState, condAction );
+}
+
+void FsmAp::allTransCondition( Action *condAction )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ embedCondition( state, condAction );
+}
+
+void FsmAp::leaveFsmCondition( Action *condAction )
+{
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outCondSet.insert( condAction );
+}
diff --git a/ragel/fsmattach.cpp b/ragel/fsmattach.cpp
new file mode 100644
index 0000000..6a90df6
--- /dev/null
+++ b/ragel/fsmattach.cpp
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void FsmAp::attachToInList( StateAp *from, StateAp *to,
+ TransAp *&head, TransAp *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * move it from the misfit list to the main list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ to->foreignInTrans += 1;
+ }
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void FsmAp::detachFromInList( StateAp *from, StateAp *to,
+ TransAp *&head, TransAp *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ to->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions goes down to 0 then move it
+ * from the main list to the misfit list. */
+ if ( to->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( to ) );
+ }
+ }
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * First makes a new transition. If there is already a transition out from
+ * fromState on the default, then will assertion fail. */
+TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey )
+{
+ /* Make the new transition. */
+ TransAp *retVal = new TransAp();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->outList.append( retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+ retVal->highKey = highKey;
+
+ /* Attach using inList as the head pointer. */
+ if ( to != 0 )
+ attachToInList( from, to, to->inList.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. This attach should
+ * be used when a transition already is allocated and must be attached to a
+ * target state. Does not handle adding the transition into the out list. */
+void FsmAp::attachTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Redirect a transition away from error and towards some state. This is just
+ * like attachTrans except it requires fromState to be set and does not touch
+ * it. */
+void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState != 0 && trans->toState == 0 );
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Detach for out/in lists or for default transition. */
+void FsmAp::detachTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ if ( to != 0 ) {
+ /* Detach using to's inList pointer as the head. */
+ detachFromInList( from, to, to->inList.head, trans );
+ }
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void FsmAp::detachState( StateAp *state )
+{
+ /* Detach the in transitions from the inList list of transitions. */
+ while ( state->inList.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ TransAp *trans = state->inList.head;
+ StateAp *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->outList.detach( trans );
+ delete trans;
+ }
+
+ /* Remove the entry points in on the machine. */
+ while ( state->entryIds.length() > 0 )
+ unsetEntry( state->entryIds[0], state );
+
+ /* Detach out range transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); ) {
+ TransList::Iter next = trans.next();
+ detachTrans( state, trans->toState, trans );
+ delete trans;
+ trans = next;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->outList.abandon();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+
+/* Duplicate a transition. Makes a new transition that is attached to the same
+ * dest as srcTrans. The new transition has functions and priority taken from
+ * srcTrans. Used for merging a transition in to a free spot. The trans can
+ * just be dropped in. It does not conflict with an existing trans and need
+ * not be crossed. Returns the new transition. */
+TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans )
+{
+ /* Make a new transition. */
+ TransAp *newTrans = new TransAp();
+
+ /* We can attach the transition, one does not exist. */
+ attachTrans( from, srcTrans->toState, newTrans );
+
+ /* Call the user callback to add in the original source transition. */
+ addInTrans( newTrans, srcTrans );
+
+ return newTrans;
+}
+
+/* In crossing, src trans and dest trans both go to existing states. Make one
+ * state from the sets of states that src and dest trans go to. */
+TransAp *FsmAp::fsmAttachStates( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ /* The priorities are equal. We must merge the transitions. Does the
+ * existing trans go to the state we are to attach to? ie, are we to
+ * simply double up the transition? */
+ StateAp *toState = srcTrans->toState;
+ StateAp *existingState = destTrans->toState;
+
+ if ( existingState == toState ) {
+ /* The transition is a double up to the same state. Copy the src
+ * trans into itself. We don't need to merge in the from out trans
+ * data, that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+ else {
+ /* The trans is not a double up. Dest trans cannot be the same as src
+ * trans. Set up the state set. */
+ StateSet stateSet;
+
+ /* We go to all the states the existing trans goes to, plus... */
+ if ( existingState->stateDictEl == 0 )
+ stateSet.insert( existingState );
+ else
+ stateSet.insert( existingState->stateDictEl->stateSet );
+
+ /* ... all the states that we have been told to go to. */
+ if ( toState->stateDictEl == 0 )
+ stateSet.insert( toState );
+ else
+ stateSet.insert( toState->stateDictEl->stateSet );
+
+ /* Look for the state. If it is not there already, make it. */
+ StateDictEl *lastFound;
+ if ( md.stateDict.insert( stateSet, &lastFound ) ) {
+ /* Make a new state representing the combination of states in
+ * stateSet. It gets added to the fill list. This means that we
+ * need to fill in it's transitions sometime in the future. We
+ * don't do that now (ie, do not recurse). */
+ StateAp *combinState = addState();
+
+ /* Link up the dict element and the state. */
+ lastFound->targState = combinState;
+ combinState->stateDictEl = lastFound;
+
+ /* Add to the fill list. */
+ md.fillListAppend( combinState );
+ }
+
+ /* Get the state insertted/deleted. */
+ StateAp *targ = lastFound->targState;
+
+ /* Detach the state from existing state. */
+ detachTrans( from, existingState, destTrans );
+
+ /* Re-attach to the new target. */
+ attachTrans( from, targ, destTrans );
+
+ /* Add in src trans to the existing transition that we redirected to
+ * the new state. We don't need to merge in the from out trans data,
+ * that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+
+ return destTrans;
+}
+
+/* Two transitions are to be crossed, handle the possibility of either going
+ * to the error state. */
+TransAp *FsmAp::mergeTrans( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ TransAp *retTrans = 0;
+ if ( destTrans->toState == 0 && srcTrans->toState == 0 ) {
+ /* Error added into error. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) {
+ /* Non error added into error we need to detach and reattach, */
+ detachTrans( from, destTrans->toState, destTrans );
+ attachTrans( from, srcTrans->toState, destTrans );
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( srcTrans->toState == 0 ) {
+ /* Dest goes somewhere but src doesn't, just add it it in. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else {
+ /* Both go somewhere, run the actual cross. */
+ retTrans = fsmAttachStates( md, from, destTrans, srcTrans );
+ }
+
+ return retTrans;
+}
+
+/* Find the trans with the higher priority. If src is lower priority then dest then
+ * src is ignored. If src is higher priority than dest, then src overwrites dest. If
+ * the priorities are equal, then they are merged. */
+TransAp *FsmAp::crossTransitions( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ TransAp *retTrans;
+
+ /* Compare the priority of the dest and src transitions. */
+ int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable );
+ if ( compareRes < 0 ) {
+ /* Src trans has a higher priority than dest, src overwrites dest.
+ * Detach dest and return a copy of src. */
+ detachTrans( from, destTrans->toState, destTrans );
+ retTrans = dupTrans( from, srcTrans );
+ }
+ else if ( compareRes > 0 ) {
+ /* The dest trans has a higher priority, use dest. */
+ retTrans = destTrans;
+ }
+ else {
+ /* Src trans and dest trans have the same priority, they must be merged. */
+ retTrans = mergeTrans( md, from, destTrans, srcTrans );
+ }
+
+ /* Return the transition that resulted from the cross. */
+ return retTrans;
+}
+
+/* Copy the transitions in srcList to the outlist of dest. The srcList should
+ * not be the outList of dest, otherwise you would be copying the contents of
+ * srcList into itself as it's iterated: bad news. */
+void FsmAp::outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList )
+{
+ /* The destination list. */
+ TransList destList;
+
+ /* Set up an iterator to stop at breaks. */
+ PairIter<TransAp> outPair( dest->outList.head, srcList );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+ case RangeInS1: {
+ /* The pair iter is the authority on the keys. It may have needed
+ * to break the dest range. */
+ TransAp *destTrans = outPair.s1Tel.trans;
+ destTrans->lowKey = outPair.s1Tel.lowKey;
+ destTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2: {
+ /* Src range may get crossed with dest's default transition. */
+ TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s2Tel.lowKey;
+ newTrans->highKey = outPair.s2Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case RangeOverlap: {
+ /* Exact overlap, cross them. */
+ TransAp *newTrans = crossTransitions( md, dest,
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s1Tel.lowKey;
+ newTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case BreakS1: {
+ /* Since we are always writing to the dest trans, the dest needs
+ * to be copied when it is broken. The copy goes into the first
+ * half of the break to "break it off". */
+ outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Abandon the old outList and transfer destList into it. */
+ dest->outList.transfer( destList );
+}
+
+
+/* Move all the transitions that go into src so that they go into dest. */
+void FsmAp::inTransMove( StateAp *dest, StateAp *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ if ( src == startState ) {
+ unsetStartState();
+ setStartState( dest );
+ }
+
+ /* For each entry point into, create an entry point into dest, when the
+ * state is detached, the entry points to src will be removed. */
+ for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ )
+ changeEntry( *enId, dest, src );
+
+ /* Move the transitions in inList. */
+ while ( src->inList.head != 0 ) {
+ /* Get trans and from state. */
+ TransAp *trans = src->inList.head;
+ StateAp *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
diff --git a/ragel/fsmbase.cpp b/ragel/fsmbase.cpp
new file mode 100644
index 0000000..16841d0
--- /dev/null
+++ b/ragel/fsmbase.cpp
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+/* Simple singly linked list append routine for the fill list. The new state
+ * goes to the end of the list. */
+void MergeData::fillListAppend( StateAp *state )
+{
+ state->alg.next = 0;
+
+ if ( stfillHead == 0 ) {
+ /* List is empty, state becomes head and tail. */
+ stfillHead = state;
+ stfillTail = state;
+ }
+ else {
+ /* List is not empty, state goes after last element. */
+ stfillTail->alg.next = state;
+ stfillTail = state;
+ }
+}
+
+/* Graph constructor. */
+FsmAp::FsmAp()
+:
+ /* No start state. */
+ startState(0),
+
+ /* Misfit accounting is a switch, turned on only at specific times. It
+ * controls what happens when states have no way in from the outside
+ * world.. */
+ misfitAccounting(false)
+{
+}
+
+/* Copy all graph data including transitions. */
+FsmAp::FsmAp( const FsmAp &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ entryPoints(graph.entryPoints),
+ startState(graph.startState),
+
+ /* Will be filled by copy. */
+ finStateSet(),
+
+ /* Misfit accounting is only on during merging. */
+ misfitAccounting(false)
+{
+ /* Create the states and record their map in the original state. */
+ StateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ StateAp *newState = new StateAp( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->alg.stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ StateAp *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->toState = 0;
+ attachTrans( state, toState, trans );
+ }
+ }
+
+ /* Fix the state pointers in the entry points array. */
+ EntryMapEl *eel = entryPoints.data;
+ for ( int e = 0; e < entryPoints.length(); e++, eel++ ) {
+ /* Get the duplicate of the state. */
+ eel->value = eel->value->alg.stateMap;
+
+ /* Foreign in transitions must be built up when duping machines so
+ * increment it here. */
+ eel->value->foreignInTrans += 1;
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->alg.stateMap;
+ startState->foreignInTrans += 1;
+
+ /* Build the final state set. */
+ StateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->alg.stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+FsmAp::~FsmAp()
+{
+ /* Delete all the transitions. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Iterate the out transitions, deleting them. */
+ state->outList.empty();
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void FsmAp::setFinState( StateAp *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+/* Set a state non-final. The has its isFinState flag set false and the state
+ * is removed from the final state set. */
+void FsmAp::unsetFinState( StateAp *state )
+{
+ /* Is it already a non-final state? */
+ if ( ! (state->stateBits & SB_ISFINAL) )
+ return;
+
+ /* When a state looses its final state status it must relinquish all the
+ * properties that are allowed only for final states. */
+ clearOutData( state );
+
+ state->stateBits &= ~ SB_ISFINAL;
+ finStateSet.remove( state );
+}
+
+/* Set and unset a state as the start state. */
+void FsmAp::setStartState( StateAp *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+}
+
+void FsmAp::unsetStartState()
+{
+ /* Should change from set to unset. */
+ assert( startState != 0 );
+
+ /* Decrement the entry's count of foreign entries. */
+ startState->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( startState->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( startState ) );
+ }
+
+ startState = 0;
+}
+
+/* Associate an id with a state. Makes the state a named entry point. Has no
+ * effect if the entry point is already mapped to the state. */
+void FsmAp::setEntry( int id, StateAp *state )
+{
+ /* Insert the id into the state. If the state is already labelled with id,
+ * nothing to do. */
+ if ( state->entryIds.insert( id ) ) {
+ /* Insert the entry and assert that it succeeds. */
+ entryPoints.insertMulti( id, state );
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+ }
+}
+
+/* Remove the association of an id with a state. The state looses it's entry
+ * point status. Assumes that the id is indeed mapped to state. */
+void FsmAp::unsetEntry( int id, StateAp *state )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != state )
+ enLow += 1;
+
+ /* Remove the record from the map. */
+ entryPoints.remove( enLow );
+
+ /* Remove the state's sense of the link. */
+ state->entryIds.remove( id );
+ state->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( state->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( state ) );
+ }
+}
+
+/* Remove all association of an id with states. Assumes that the id is indeed
+ * mapped to a state. */
+void FsmAp::unsetEntry( int id )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) {
+ /* Remove the state's sense of the link. */
+ mel->value->entryIds.remove( id );
+ mel->value->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit list. */
+ if ( mel->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( mel->value ) );
+ }
+ }
+
+ /* Remove the records from the entry points map. */
+ entryPoints.removeMulti( enLow, enHigh );
+}
+
+
+void FsmAp::changeEntry( int id, StateAp *to, StateAp *from )
+{
+ /* Find the entry in the entry map. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != from )
+ enLow += 1;
+
+ /* Change it to the new target. */
+ enLow->value = to;
+
+ /* Remove from's sense of the link. */
+ from->entryIds.remove( id );
+ from->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( from->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( from ) );
+ }
+
+ /* Add to's sense of the link. */
+ if ( to->entryIds.insert( id ) != 0 ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ to->foreignInTrans += 1;
+ }
+}
+
+
+/* Clear all entry points from a machine. */
+void FsmAp::unsetAllEntryPoints()
+{
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) {
+ /* Kill all the state's entry points at once. */
+ if ( en->value->entryIds.length() > 0 ) {
+ en->value->foreignInTrans -= en->value->entryIds.length();
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit
+ * list. */
+ if ( en->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( en->value ) );
+ }
+
+ /* Clear the set of ids out all at once. */
+ en->value->entryIds.empty();
+ }
+ }
+
+ /* Now clear out the entry map all at once. */
+ entryPoints.empty();
+}
+
+/* Assigning an epsilon transition into final states. */
+void FsmAp::epsilonTrans( int id )
+{
+ for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ )
+ (*fs)->epsilonTrans.append( id );
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void FsmAp::markReachableFromHere( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ markReachableFromHere( trans->toState );
+ }
+}
+
+void FsmAp::markReachableFromHereStopFinal( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ StateAp *toState = trans->toState;
+ if ( toState != 0 && !toState->isFinState() )
+ markReachableFromHereStopFinal( toState );
+ }
+}
+
+/* Mark all states reachable from state. Traverse transitions backwards. Used
+ * for removing dead end paths in graphs. */
+void FsmAp::markReachableFromHereReverse( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states with
+ * transitions into this state. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all items in transitions. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ markReachableFromHereReverse( trans->fromState );
+}
+
+/* Determine if there are any entry points into a start state other than the
+ * start state. Setting starting transitions requires that the start state be
+ * isolated. In most cases a start state will already be isolated. */
+bool FsmAp::isStartStateIsolated()
+{
+ /* If there are any in transitions then the state is not isolated. */
+ if ( startState->inList.head != 0 )
+ return false;
+
+ /* If there are any entry points then isolated. */
+ if ( startState->entryIds.length() > 0 )
+ return false;
+
+ return true;
+}
+
+/* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+void FsmAp::copyInEntryPoints( FsmAp *other )
+{
+ /* Use insert multi because names are not unique. */
+ for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ )
+ entryPoints.insertMulti( en->key, en->value );
+}
+
+void FsmAp::setStateNumbers()
+{
+ int curNum = 0;
+ StateList::Iter state = stateList;
+ for ( ; state.lte(); state++ )
+ state->alg.stateNum = curNum++;
+}
+
+
+void FsmAp::unsetAllFinStates()
+{
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ )
+ (*st)->stateBits &= ~ SB_ISFINAL;
+ finStateSet.empty();
+}
+
+void FsmAp::setFinBits( int finStateBits )
+{
+ for ( int s = 0; s < finStateSet.length(); s++ )
+ finStateSet.data[s]->stateBits |= finStateBits;
+}
+
+
+/* Tests the integrity of the transition lists and the fromStates. */
+void FsmAp::verifyIntegrity()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out transitions and assert fromState is correct. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ assert( trans->fromState == state );
+
+ /* Walk the inlist and assert toState is correct. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ assert( trans->toState == state );
+ }
+}
+
+void FsmAp::verifyReachability()
+{
+ /* Mark all the states that can be reached
+ * through the set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Check that everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert it got marked and then clear the mark. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmAp::verifyNoDeadEndStates()
+{
+ /* Mark all states that have paths to the final states. */
+ for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ )
+ markReachableFromHereReverse( *pst );
+
+ /* Start state gets honorary marking. Must be done AFTER recursive call. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Make sure everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert the state got marked and unmark it. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
diff --git a/ragel/fsmgraph.cpp b/ragel/fsmgraph.cpp
new file mode 100644
index 0000000..41c4b44
--- /dev/null
+++ b/ragel/fsmgraph.cpp
@@ -0,0 +1,1399 @@
+/*
+ * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <assert.h>
+#include <iostream>
+
+#include "fsmgraph.h"
+#include "mergesort.h"
+#include "parsedata.h"
+
+using std::cerr;
+using std::endl;
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+StateAp *FsmAp::addState()
+{
+ /* Make the new state to return. */
+ StateAp *state = new StateAp();
+
+ if ( misfitAccounting ) {
+ /* Create the new state on the misfit list. All states are created
+ * with no foreign in transitions. */
+ misfitList.append( state );
+ }
+ else {
+ /* Create the new state. */
+ stateList.append( state );
+ }
+
+ return state;
+}
+
+/* Construct an FSM that is the concatenation of an array of characters. A new
+ * machine will be made that has len+1 states with one transition between each
+ * state for each integer in str. IsSigned determines if the integers are to
+ * be considered as signed or unsigned ints. */
+void FsmAp::concatFsm( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ StateAp *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ StateAp *newState = addState();
+ attachNewTrans( last, newState, str[i], str[i] );
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Case insensitive version of concatFsm. */
+void FsmAp::concatFsmCI( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ StateAp *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ StateAp *newState = addState();
+
+ KeySet keySet;
+ if ( str[i].isLower() )
+ keySet.insert( str[i].toUpper() );
+ if ( str[i].isUpper() )
+ keySet.insert( str[i].toLower() );
+ keySet.insert( str[i] );
+
+ for ( int i = 0; i < keySet.length(); i++ )
+ attachNewTrans( last, newState, keySet[i], keySet[i] );
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Construct a machine that matches one character. A new machine will be made
+ * that has two states with a single transition between the states. IsSigned
+ * determines if the integers are to be considered as signed or unsigned ints. */
+void FsmAp::concatFsm( Key chr )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ /* Attach on the character. */
+ attachNewTrans( startState, end, chr, chr );
+}
+
+/* Construct a machine that matches any character in set. A new machine will
+ * be made that has two states and len transitions between the them. The set
+ * should be ordered correctly accroding to KeyOps and should not contain
+ * any duplicates. */
+void FsmAp::orFsm( Key *set, int len )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ for ( int i = 1; i < len; i++ )
+ assert( set[i-1] < set[i] );
+
+ /* Attach on all the integers in the given string of ints. */
+ for ( int i = 0; i < len; i++ )
+ attachNewTrans( startState, end, set[i], set[i] );
+}
+
+/* Construct a machine that matches a range of characters. A new machine will
+ * be made with two states and a range transition between them. The range will
+ * match any characters from low to high inclusive. Low should be less than or
+ * equal to high otherwise undefined behaviour results. IsSigned determines
+ * if the integers are to be considered as signed or unsigned ints. */
+void FsmAp::rangeFsm( Key low, Key high )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ /* Attach using the range of characters. */
+ attachNewTrans( startState, end, low, high );
+}
+
+/* Construct a machine that a repeated range of characters. */
+void FsmAp::rangeStarFsm( Key low, Key high)
+{
+ /* One state which is final and is the start state. */
+ setStartState( addState() );
+ setFinState( startState );
+
+ /* Attach start to start using range of characters. */
+ attachNewTrans( startState, startState, low, high );
+}
+
+/* Construct a machine that matches the empty string. A new machine will be
+ * made with only one state. The new state will be both a start and final
+ * state. IsSigned determines if the machine has a signed or unsigned
+ * alphabet. Fsm operations must be done on machines with the same alphabet
+ * signedness. */
+void FsmAp::lambdaFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+ setFinState( startState );
+}
+
+/* Construct a machine that matches nothing at all. A new machine will be
+ * made with only one state. It will not be final. */
+void FsmAp::emptyFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+}
+
+void FsmAp::transferOutData( StateAp *destState, StateAp *srcState )
+{
+ for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 ) {
+ /* Get the actions data from the outActionTable. */
+ trans->actionTable.setActions( srcState->outActionTable );
+
+ /* Get the priorities from the outPriorTable. */
+ trans->priorTable.setPriors( srcState->outPriorTable );
+ }
+ }
+}
+
+/* Kleene star operator. Makes this machine the kleene star of itself. Any
+ * transitions made going out of the machine and back into itself will be
+ * notified that they are leaving transitions by having the leavingFromState
+ * callback invoked. */
+void FsmAp::starOp( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Create the new new start state. It will be set final after the merging
+ * of the final states with the start state is complete. */
+ StateAp *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Merge the start state into all final states. Except the start state on
+ * the first pass. If the start state is set final we will be doubling up
+ * its transitions, which will get transfered to any final states that
+ * follow it in the final state set. This will be determined by the order
+ * of items in the final state set. To prevent this we just merge with the
+ * start on a second pass. */
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ if ( *st != startState )
+ mergeStatesLeaving( md, *st, startState );
+ }
+
+ /* Now it is safe to merge the start state with itself (provided it
+ * is set final). */
+ if ( startState->isFinState() )
+ mergeStatesLeaving( md, startState, startState );
+
+ /* Now ensure the new start state is a final state. */
+ setFinState( startState );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmAp::repeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one does absolutely nothing. */
+ if ( times == 1 )
+ return;
+
+ /* Make a machine to make copies from. */
+ FsmAp *copyFrom = new FsmAp( *this );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ FsmAp *dup = new FsmAp( *copyFrom );
+ doConcat( dup, 0, false );
+ }
+
+ /* Now use the copyFrom on the end. */
+ doConcat( copyFrom, 0, false );
+}
+
+void FsmAp::optionalRepeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one optional merely allows zero string. */
+ if ( times == 1 ) {
+ setFinState( startState );
+ return;
+ }
+
+ /* Make a machine to make copies from. */
+ FsmAp *copyFrom = new FsmAp( *this );
+
+ /* The state set used in the from end of the concatentation. Starts with
+ * the initial final state set, then after each concatenation, gets set to
+ * the the final states that come from the the duplicate. */
+ StateSet lastFinSet( finStateSet );
+
+ /* Set the initial state to zero to allow zero copies. */
+ setFinState( startState );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ /* Make a duplicate for concating and set the fin bits to graph 2 so we
+ * can pick out it's final states after the optional style concat. */
+ FsmAp *dup = new FsmAp( *copyFrom );
+ dup->setFinBits( SB_GRAPH2 );
+ doConcat( dup, &lastFinSet, true );
+
+ /* Clear the last final state set and make the new one by taking only
+ * the final states that come from graph 2.*/
+ lastFinSet.empty();
+ for ( int i = 0; i < finStateSet.length(); i++ ) {
+ /* If the state came from graph 2, add it to the last set and clear
+ * the bits. */
+ StateAp *fs = finStateSet[i];
+ if ( fs->stateBits & SB_GRAPH2 ) {
+ lastFinSet.insert( fs );
+ fs->stateBits &= ~SB_GRAPH2;
+ }
+ }
+ }
+
+ /* Now use the copyFrom on the end, no bits set, no bits to clear. */
+ doConcat( copyFrom, &lastFinSet, true );
+}
+
+
+/* Fsm concatentation worker. Supports treating the concatentation as optional,
+ * which essentially leaves the final states of machine one as final. */
+void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional )
+{
+ /* For the merging process. */
+ StateSet finStateSetCopy, startStateSet;
+ MergeData md;
+
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Get the other's start state. */
+ StateAp *otherStartState = other->startState;
+
+ /* Unset other's start state before bringing in the entry points. */
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Bring in other's states into our state lists. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* If from states is not set, then get a copy of our final state set before
+ * we clobber it and use it instead. */
+ if ( fromStates == 0 ) {
+ finStateSetCopy = finStateSet;
+ fromStates = &finStateSetCopy;
+ }
+
+ /* Unset all of our final states and get the final states from other. */
+ if ( !optional )
+ unsetAllFinStates();
+ finStateSet.insert( other->finStateSet );
+
+ /* Since other's lists are empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Merge our former final states with the start state of other. */
+ for ( int i = 0; i < fromStates->length(); i++ ) {
+ StateAp *state = fromStates->data[i];
+
+ /* Merge the former final state with other's start state. */
+ mergeStatesLeaving( md, state, otherStartState );
+
+ /* If the former final state was not reset final then we must clear
+ * the state's out trans data. If it got reset final then it gets to
+ * keep its out trans data. This must be done before fillInStates gets
+ * called to prevent the data from being sourced. */
+ if ( ! state->isFinState() )
+ clearOutData( state );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Concatenates other to the end of this machine. Other is deleted. Any
+ * transitions made leaving this machine and entering into other are notified
+ * that they are leaving transitions by having the leavingFromState callback
+ * invoked. */
+void FsmAp::concatOp( FsmAp *other )
+{
+ /* Assert same signedness and return graph concatenation op. */
+ doConcat( other, 0, false );
+}
+
+
+void FsmAp::doOr( FsmAp *other )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Build a state set consisting of both start states */
+ StateSet startStateSet;
+ startStateSet.insert( startState );
+ startStateSet.insert( other->startState );
+
+ /* Both of the original start states loose their start state status. */
+ unsetStartState();
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other
+ * into this. No states will be deleted. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert(other->finStateSet);
+ other->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Create a new start state. */
+ setStartState( addState() );
+
+ /* Merge the start states. */
+ mergeStates( md, startState, startStateSet.data, startStateSet.length() );
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+}
+
+/* Unions other with this machine. Other is deleted. */
+void FsmAp::unionOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Call Worker routine. */
+ doOr( other );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Intersects other with this machine. Other is deleted. */
+void FsmAp::intersectOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits on this and other to want each other. */
+ setFinBits( SB_GRAPH1 );
+ other->setFinBits( SB_GRAPH2 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetIncompleteFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+/* Set subtracts other machine from this machine. Other is deleted. */
+void FsmAp::subtractOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits of other to be killers. */
+ other->setFinBits( SB_GRAPH1 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetKilledFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state )
+{
+ if ( eptVect != 0 ) {
+ /* Vect is there, walk it looking for state. */
+ for ( int i = 0; i < eptVect->length(); i++ ) {
+ if ( eptVect->data[i].targ == state )
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Fill epsilon vectors in a root state from a given starting point. Epmploys
+ * a depth first search through the graph of epsilon transitions. */
+void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving )
+{
+ /* Walk the epsilon transitions out of the state. */
+ for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
+ /* Find the entry point, if the it does not resove, ignore it. */
+ EntryMapEl *enLow, *enHigh;
+ if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
+ /* Loop the targets. */
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
+ /* Do not add the root or states already in eptVect. */
+ StateAp *targ = en->value;
+ if ( targ != from && !inEptVect(root->eptVect, targ) ) {
+ /* Maybe need to create the eptVect. */
+ if ( root->eptVect == 0 )
+ root->eptVect = new EptVect();
+
+ /* If moving to a different graph or if any parent is
+ * leaving then we are leaving. */
+ bool leaving = parentLeaving ||
+ root->owningGraph != targ->owningGraph;
+
+ /* All ok, add the target epsilon and recurse. */
+ root->eptVect->append( EptVectEl(targ, leaving) );
+ epsilonFillEptVectFrom( root, targ, leaving );
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::shadowReadWriteStates( MergeData &md )
+{
+ /* Init isolatedShadow algorithm data. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->isolatedShadow = 0;
+
+ /* Any states that may be both read from and written to must
+ * be shadowed. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Find such states by looping through stateVect lists, which give us
+ * the states that will be read from. May cause us to visit the states
+ * that we are interested in more than once. */
+ if ( st->eptVect != 0 ) {
+ /* For all states that will be read from. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ /* Check for read and write to the same state. */
+ StateAp *targ = ept->targ;
+ if ( targ->eptVect != 0 ) {
+ /* State is to be written to, if the shadow is not already
+ * there, create it. */
+ if ( targ->isolatedShadow == 0 ) {
+ StateAp *shadow = addState();
+ mergeStates( md, shadow, targ );
+ targ->isolatedShadow = shadow;
+ }
+
+ /* Write shadow into the state vector so that it is the
+ * state that the epsilon transition will read from. */
+ ept->targ = targ->isolatedShadow;
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::resolveEpsilonTrans( MergeData &md )
+{
+ /* Walk the state list and invoke recursive worker on each state. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ epsilonFillEptVectFrom( st, st, false );
+
+ /* Prevent reading from and writing to of the same state. */
+ shadowReadWriteStates( md );
+
+ /* For all states that have epsilon transitions out, draw the transitions,
+ * clear the epsilon transitions. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* If there is a state vector, then create the pre-merge state. */
+ if ( st->eptVect != 0 ) {
+ /* Merge all the epsilon targets into the state. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ if ( ept->leaving )
+ mergeStatesLeaving( md, st, ept->targ );
+ else
+ mergeStates( md, st, ept->targ );
+ }
+
+ /* Clean up the target list. */
+ delete st->eptVect;
+ st->eptVect = 0;
+ }
+
+ /* Clear the epsilon transitions vector. */
+ st->epsilonTrans.empty();
+ }
+}
+
+void FsmAp::epsilonOp()
+{
+ /* For merging process. */
+ MergeData md;
+
+ setMisfitAccounting( true );
+
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 0;
+
+ /* Perform merges. */
+ resolveEpsilonTrans( md );
+
+ /* Epsilons can caused merges which leave behind unreachable states. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Make a new maching by joining together a bunch of machines without making
+ * any transitions between them. A negative finalId results in there being no
+ * final id. */
+void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Set the owning machines. Start at one. Zero is reserved for the start
+ * and final states. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 1;
+ for ( int m = 0; m < numOthers; m++ ) {
+ for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
+ st->owningGraph = 2+m;
+ }
+
+ /* All machines loose start state status. */
+ unsetStartState();
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+
+ /* Look up the start entry point. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
+ if ( ! findRes ) {
+ /* No start state. Set a default one and proceed with the join. Note
+ * that the result of the join will be a very uninteresting machine. */
+ setStartState( addState() );
+ }
+ else {
+ /* There is at least one start state, create a state that will become
+ * the new start state. */
+ StateAp *newStart = addState();
+ setStartState( newStart );
+
+ /* The start state is in an owning machine class all it's own. */
+ newStart->owningGraph = 0;
+
+ /* Create the set of states to merge from. */
+ StateSet stateSet;
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
+ stateSet.insert( en->value );
+
+ /* Merge in the set of start states into the new start state. */
+ mergeStates( md, newStart, stateSet.data, stateSet.length() );
+ }
+
+ /* Take a copy of the final state set, before unsetting them all. This
+ * will allow us to call clearOutData on the states that don't get
+ * final state status back back. */
+ StateSet finStateSetCopy = finStateSet;
+
+ /* Now all final states are unset. */
+ unsetAllFinStates();
+
+ if ( finalId >= 0 ) {
+ /* Create the implicit final state. */
+ StateAp *finState = addState();
+ setFinState( finState );
+
+ /* Assign an entry into the final state on the final state entry id. Note
+ * that there may already be an entry on this id. That's ok. Also set the
+ * final state owning machine id. It's in a class all it's own. */
+ setEntry( finalId, finState );
+ finState->owningGraph = 0;
+ }
+
+ /* Hand over to workers for resolving epsilon trans. This will merge states
+ * with the targets of their epsilon transitions. */
+ resolveEpsilonTrans( md );
+
+ /* Invoke the relinquish final callback on any states that did not get
+ * final state status back. */
+ for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
+ if ( !((*st)->stateBits & SB_ISFINAL) )
+ clearOutData( *st );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Joining can be messy. Instead of having misfit accounting on (which is
+ * tricky here) do a full cleaning. */
+ removeUnreachableStates();
+}
+
+void FsmAp::globOp( FsmAp **others, int numOthers )
+{
+ /* All other machines loose start states status. */
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+}
+
+void FsmAp::deterministicEntry()
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* States may loose their entry points, turn on misfit accounting. */
+ setMisfitAccounting( true );
+
+ /* Get a copy of the entry map then clear all the entry points. As we
+ * iterate the old entry map finding duplicates we will add the entry
+ * points for the new states that we create. */
+ EntryMap prevEntry = entryPoints;
+ unsetAllEntryPoints();
+
+ for ( int enId = 0; enId < prevEntry.length(); ) {
+ /* Count the number of states on this entry key. */
+ int highId = enId;
+ while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
+ highId += 1;
+
+ int numIds = highId - enId;
+ if ( numIds == 1 ) {
+ /* Only a single entry point, just set the entry. */
+ setEntry( prevEntry[enId].key, prevEntry[enId].value );
+ }
+ else {
+ /* Multiple entry points, need to create a new state and merge in
+ * all the targets of entry points. */
+ StateAp *newEntry = addState();
+ for ( int en = enId; en < highId; en++ )
+ mergeStates( md, newEntry, prevEntry[en].value );
+
+ /* Add the new state as the single entry point. */
+ setEntry( prevEntry[enId].key, newEntry );
+ }
+
+ enId += numIds;
+ }
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmAp::unsetKilledFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for killing bit. */
+ StateAp *state = fin.data[s];
+ if ( state->stateBits & SB_GRAPH1 ) {
+ /* One final state is a killer, set to non-final. */
+ unsetFinState( state );
+ }
+
+ /* Clear all killing bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_GRAPH1;
+ }
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmAp::unsetIncompleteFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for one set but not the other. */
+ StateAp *state = fin.data[s];
+ if ( state->stateBits & SB_BOTH &&
+ (state->stateBits & SB_BOTH) != SB_BOTH )
+ {
+ /* One state wants the other but it is not there. */
+ unsetFinState( state );
+ }
+
+ /* Clear wanting bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_BOTH;
+ }
+}
+
+/* Ensure that the start state is free of entry points (aside from the fact
+ * that it is the start state). If the start state has entry points then Make a
+ * new start state by merging with the old one. Useful before modifying start
+ * transitions. If the existing start state has any entry points other than the
+ * start state entry then modifying its transitions changes more than the start
+ * transitions. So isolate the start state by separating it out such that it
+ * only has start stateness as it's entry point. */
+void FsmAp::isolateStartState( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Bail out if the start state is already isolated. */
+ if ( isStartStateIsolated() )
+ return;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* This will be the new start state. The existing start
+ * state is merged with it. */
+ StateAp *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Stfil and stateDict will be empty because the merging of the old start
+ * state into the new one will not have any conflicting transitions. */
+ assert( md.stateDict.treeSize == 0 );
+ assert( md.stfillHead == 0 );
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+#ifdef LOG_CONDS
+void logCondSpace( CondSpace *condSpace )
+{
+ if ( condSpace == 0 )
+ cerr << "<empty>";
+ else {
+ for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
+ if ( ! csi.last() )
+ cerr << ',';
+ (*csi)->actionName( cerr );
+ }
+ }
+}
+
+void logNewExpansion( Expansion *exp )
+{
+ cerr << "created expansion:" << endl;
+ cerr << " range: " << exp->lowKey.getVal() << " .. " <<
+ exp->highKey.getVal() << endl;
+
+ cerr << " fromCondSpace: ";
+ logCondSpace( exp->fromCondSpace );
+ cerr << endl;
+ cerr << " fromVals: " << exp->fromVals << endl;
+
+ cerr << " toCondSpace: ";
+ logCondSpace( exp->toCondSpace );
+ cerr << endl;
+ cerr << " toValsList: ";
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
+ cerr << " " << *to;
+ cerr << endl;
+}
+#endif
+
+
+void FsmAp::findTransExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState )
+{
+ PairIter<TransAp, StateCond> transCond( destState->outList.head,
+ srcState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ if ( transCond.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
+ expansion->toCondSpace = srcCS;
+
+ long numTargVals = (1 << srcCS->condSet.length());
+ for ( long targVals = 0; targVals < numTargVals; targVals++ )
+ expansion->toValsList.append( targVals );
+
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ expansionList.append( expansion );
+ }
+ }
+}
+
+void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long fromVals, LongVect &toValsList )
+{
+ TransAp searchTrans;
+ searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (lowKey - keyOps->minKey);
+ searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (highKey - keyOps->minKey);
+ searchTrans.prev = searchTrans.next = 0;
+
+ PairIter<TransAp> pairIter( state->outList.head, &searchTrans );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ if ( pairIter.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( lowKey, highKey );
+ expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
+ expansion->fromCondSpace = fromCondSpace;
+ expansion->fromVals = fromVals;
+ expansion->toCondSpace = toCondSpace;
+ expansion->toValsList = toValsList;
+
+ expansionList.append( expansion );
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ }
+ }
+}
+
+void FsmAp::findCondExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState )
+{
+ PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !condCond.end(); condCond++ ) {
+ if ( condCond.userState == RangeOverlap ) {
+ /* Loop over all existing condVals . */
+ CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
+ long destLen = destCS.length();
+
+ /* Find the items in src cond set that are not in dest
+ * cond set. These are the items that we must expand. */
+ CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
+ for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
+ srcOnlyCS.remove( *dcsi );
+ long srcOnlyLen = srcOnlyCS.length();
+
+ if ( srcOnlyCS.length() > 0 ) {
+ #ifdef LOG_CONDS
+ cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
+ "only in the srcCS" << endl;
+ #endif
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
+
+ CondSpace *fromCondSpace = addCondSpace( destCS );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ /* Loop all new values. */
+ LongVect expandToVals;
+ for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
+ long targVals = basicVals;
+ for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
+ if ( soVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+ }
+ }
+ expandToVals.append( targVals );
+ }
+
+ findCondExpInTrans( expansionList, destState,
+ condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
+ long targVals = *to;
+
+ /* We will use the copy of the transition that was made when the
+ * expansion was created. It will get used multiple times. Each
+ * time we must set up the keys, everything else is constant and
+ * and already prepared. */
+ TransAp *srcTrans = exp->fromTrans;
+
+ srcTrans->lowKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ srcTrans->highKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+
+ TransList srcList;
+ srcList.append( srcTrans );
+ outTransCopy( md, destState, srcList.head );
+ srcList.abandon();
+ }
+ }
+}
+
+
+void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ Removal removal;
+ if ( exp->fromCondSpace == 0 ) {
+ removal.lowKey = exp->lowKey;
+ removal.highKey = exp->highKey;
+ }
+ else {
+ removal.lowKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ removal.highKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+ }
+ removal.next = 0;
+
+ TransList destList;
+ PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ TransAp *destTrans = pairIter.s1Tel.trans;
+ destTrans->lowKey = pairIter.s1Tel.lowKey;
+ destTrans->highKey = pairIter.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2:
+ break;
+ case RangeOverlap: {
+ TransAp *trans = pairIter.s1Tel.trans;
+ detachTrans( trans->fromState, trans->toState, trans );
+ delete trans;
+ break;
+ }
+ case BreakS1: {
+ pairIter.s1Tel.trans = dupTrans( destState,
+ pairIter.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+ destState->outList.transfer( destList );
+ }
+}
+
+void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState )
+{
+ StateCondList destList;
+ PairIter<StateCond> pairIter( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case RangeInS2: {
+ StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
+ newCond->lowKey = pairIter.s2Tel.lowKey;
+ newCond->highKey = pairIter.s2Tel.highKey;
+ destList.append( newCond );
+ break;
+ }
+ case RangeOverlap: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ StateCond *srcCond = pairIter.s2Tel.trans;
+ CondSet mergedCondSet;
+ mergedCondSet.insert( destCond->condSpace->condSet );
+ mergedCondSet.insert( srcCond->condSpace->condSet );
+ destCond->condSpace = addCondSpace( mergedCondSet );
+
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case BreakS1:
+ pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
+ break;
+
+ case BreakS2:
+ break;
+ }
+ }
+ destState->stateCondList.transfer( destList );
+}
+
+/* A state merge which represents the drawing in of leaving transitions. If
+ * there is any out data then we duplicate the souce state, transfer the out
+ * data, then merge in the state. The new state will be reaped because it will
+ * not be given any in transitions. */
+void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState )
+{
+ if ( !hasOutData( destState ) )
+ mergeStates( md, destState, srcState );
+ else {
+ StateAp *ssMutable = addState();
+ mergeStates( md, ssMutable, srcState );
+ transferOutData( ssMutable, destState );
+
+ for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
+ embedCondition( md, ssMutable, *cond );
+
+ mergeStates( md, destState, ssMutable );
+ }
+}
+
+void FsmAp::mergeStates( MergeData &md, StateAp *destState,
+ StateAp **srcStates, int numSrc )
+{
+ for ( int s = 0; s < numSrc; s++ )
+ mergeStates( md, destState, srcStates[s] );
+}
+
+void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState )
+{
+ ExpansionList expList1;
+ ExpansionList expList2;
+
+ findTransExpansions( expList1, destState, srcState );
+ findCondExpansions( expList1, destState, srcState );
+ findTransExpansions( expList2, srcState, destState );
+ findCondExpansions( expList2, srcState, destState );
+
+ mergeStateConds( destState, srcState );
+
+ outTransCopy( md, destState, srcState->outList.head );
+
+ doExpand( md, destState, expList1 );
+ doExpand( md, destState, expList2 );
+
+ doRemove( md, destState, expList1 );
+ doRemove( md, destState, expList2 );
+
+ expList1.empty();
+ expList2.empty();
+
+ /* Get its bits and final state status. */
+ destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
+ if ( srcState->isFinState() )
+ setFinState( destState );
+
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState == destState ) {
+ /* Duplicate the list to protect against write to source. The
+ * priorities sets are not copied in because that would have no
+ * effect. */
+ destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
+
+ /* Get all actions, duplicating to protect against write to source. */
+ destState->toStateActionTable.setActions(
+ ActionTable( srcState->toStateActionTable ) );
+ destState->fromStateActionTable.setActions(
+ ActionTable( srcState->fromStateActionTable ) );
+ destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
+ destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
+ destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
+ destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
+ }
+ else {
+ /* Get the epsilons, out priorities. */
+ destState->epsilonTrans.append( srcState->epsilonTrans );
+ destState->outPriorTable.setPriors( srcState->outPriorTable );
+
+ /* Get all actions. */
+ destState->toStateActionTable.setActions( srcState->toStateActionTable );
+ destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
+ destState->outActionTable.setActions( srcState->outActionTable );
+ destState->outCondSet.insert( srcState->outCondSet );
+ destState->errActionTable.setActions( srcState->errActionTable );
+ destState->eofActionTable.setActions( srcState->eofActionTable );
+ }
+}
+
+void FsmAp::fillInStates( MergeData &md )
+{
+ /* Merge any states that are awaiting merging. This will likey cause
+ * other states to be added to the stfil list. */
+ StateAp *state = md.stfillHead;
+ while ( state != 0 ) {
+ StateSet *stateSet = &state->stateDictEl->stateSet;
+ mergeStates( md, state, stateSet->data, stateSet->length() );
+ state = state->alg.next;
+ }
+
+ /* Delete the state sets of all states that are on the fill list. */
+ state = md.stfillHead;
+ while ( state != 0 ) {
+ /* Delete and reset the state set. */
+ delete state->stateDictEl;
+ state->stateDictEl = 0;
+
+ /* Next state in the stfill list. */
+ state = state->alg.next;
+ }
+
+ /* StateDict will still have its ptrs/size set but all of it's element
+ * will be deleted so we don't need to clean it up. */
+}
+
+void FsmAp::findEmbedExpansions( ExpansionList &expansionList,
+ StateAp *destState, Action *condAction )
+{
+ StateCondList destList;
+ PairIter<TransAp, StateCond> transCond( destState->outList.head,
+ destState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ switch ( transCond.userState ) {
+ case RangeInS1: {
+ if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
+ assert( transCond.s1Tel.highKey <= keyOps->maxKey );
+
+ /* Make a new state cond. */
+ StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
+ destList.append( newStateCond );
+
+ /* Create the expansion. */
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ expansion->toCondSpace = newStateCond->condSpace;
+ expansion->toValsList.append( 1 );
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ expansionList.append( expansion );
+ }
+ break;
+ }
+ case RangeInS2: {
+ /* Enhance state cond and find the expansion. */
+ StateCond *stateCond = transCond.s2Tel.trans;
+ stateCond->lowKey = transCond.s2Tel.lowKey;
+ stateCond->highKey = transCond.s2Tel.highKey;
+
+ CondSet &destCS = stateCond->condSpace->condSet;
+ long destLen = destCS.length();
+ CondSpace *fromCondSpace = stateCond->condSpace;
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condAction );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+ stateCond->condSpace = toCondSpace;
+ destList.append( stateCond );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ long targVals = basicVals;
+ Action **cim = mergedCS.find( condAction );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+
+ LongVect expandToVals( targVals );
+ findCondExpInTrans( expansionList, destState,
+ transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ break;
+ }
+
+
+ case RangeOverlap:
+ case BreakS1:
+ case BreakS2:
+ assert( false );
+ break;
+ }
+ }
+
+ destState->stateCondList.transfer( destList );
+}
+
+void FsmAp::embedCondition( StateAp *state, Action *condAction )
+{
+ MergeData md;
+ ExpansionList expList;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Worker. */
+ embedCondition( md, state, condAction );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction )
+{
+ ExpansionList expList;
+
+ findEmbedExpansions( expList, state, condAction );
+ doExpand( md, state, expList );
+ doRemove( md, state, expList );
+ expList.empty();
+}
diff --git a/ragel/fsmgraph.h b/ragel/fsmgraph.h
new file mode 100644
index 0000000..1a8e80c
--- /dev/null
+++ b/ragel/fsmgraph.h
@@ -0,0 +1,1369 @@
+/*
+ * Copyright 2001-2004 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMGRAPH_H
+#define _FSMGRAPH_H
+
+#include <assert.h>
+#include "common.h"
+#include "vector.h"
+#include "bstset.h"
+#include "compare.h"
+#include "avltree.h"
+#include "dlist.h"
+#include "bstmap.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+#include "avlset.h"
+#include "avlmap.h"
+
+//#define LOG_CONDS
+
+/* Flags that control merging. */
+#define SB_GRAPH1 0x01
+#define SB_GRAPH2 0x02
+#define SB_BOTH 0x03
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+
+struct TransAp;
+struct StateAp;
+struct FsmAp;
+struct Action;
+struct LongestMatchPart;
+
+/* State list element for unambiguous access to list element. */
+struct FsmListEl
+{
+ StateAp *prev, *next;
+};
+
+/* This is the marked index for a state pair. Used in minimization. It keeps
+ * track of whether or not the state pair is marked. */
+struct MarkIndex
+{
+ MarkIndex(int states);
+ ~MarkIndex();
+
+ void markPair(int state1, int state2);
+ bool isPairMarked(int state1, int state2);
+
+private:
+ int numStates;
+ bool *array;
+};
+
+extern KeyOps *keyOps;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+
+ bool hasAction( Action *action );
+};
+
+typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
+typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
+
+/* Transition Action Table. */
+struct LmActionTable
+ : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
+{
+ void setAction( int ordering, LongestMatchPart *action );
+ void setActions( const LmActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Compare of a whole lm action table element (key & value). */
+struct CmpLmActionTableEl
+{
+ static int compare( const LmActionTableEl &lmAction1,
+ const LmActionTableEl &lmAction2 )
+ {
+ if ( lmAction1.key < lmAction2.key )
+ return -1;
+ else if ( lmAction1.key > lmAction2.key )
+ return 1;
+ else if ( lmAction1.value < lmAction2.value )
+ return -1;
+ else if ( lmAction1.value > lmAction2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
+
+/* Action table element for error action tables. Adds the encoding of transfer
+ * point. */
+struct ErrActionTableEl
+{
+ ErrActionTableEl( Action *action, int ordering, int transferPoint )
+ : ordering(ordering), action(action), transferPoint(transferPoint) { }
+
+ /* Ordering and id of the action embedding. */
+ int ordering;
+ Action *action;
+
+ /* Id of point of transfere from Error action table to transtions and
+ * eofActionTable. */
+ int transferPoint;
+
+ int getKey() const { return ordering; }
+};
+
+struct ErrActionTable
+ : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action, int transferPoint );
+ void setActions( const ErrActionTable &other );
+};
+
+/* Compare of an error action table element (key & value). */
+struct CmpErrActionTableEl
+{
+ static int compare( const ErrActionTableEl &action1,
+ const ErrActionTableEl &action2 )
+ {
+ if ( action1.ordering < action2.ordering )
+ return -1;
+ else if ( action1.ordering > action2.ordering )
+ return 1;
+ else if ( action1.action < action2.action )
+ return -1;
+ else if ( action1.action > action2.action )
+ return 1;
+ else if ( action1.transferPoint < action2.transferPoint )
+ return -1;
+ else if ( action1.transferPoint > action2.transferPoint )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ErrActionTable. */
+typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
+
+
+/* Descibe a priority, shared among PriorEls.
+ * Has key and whether or not used. */
+struct PriorDesc
+{
+ int key;
+ int priority;
+};
+
+/* Element in the arrays of priorities for transitions and arrays. Ordering is
+ * unique among instantiations of machines, desc is shared. */
+struct PriorEl
+{
+ PriorEl( int ordering, PriorDesc *desc )
+ : ordering(ordering), desc(desc) { }
+
+ int ordering;
+ PriorDesc *desc;
+};
+
+/* Compare priority elements, which are ordered by the priority descriptor
+ * key. */
+struct PriorElCmp
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc->key < pel2.desc->key )
+ return -1;
+ else if ( pel1.desc->key > pel2.desc->key )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Priority Table. */
+struct PriorTable
+ : public SBstSet< PriorEl, PriorElCmp >
+{
+ void setPrior( int ordering, PriorDesc *desc );
+ void setPriors( const PriorTable &other );
+};
+
+/* Compare of prior table elements for distinguising state data. */
+struct CmpPriorEl
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc < pel2.desc )
+ return -1;
+ else if ( pel1.desc > pel2.desc )
+ return 1;
+ else if ( pel1.ordering < pel2.ordering )
+ return -1;
+ else if ( pel1.ordering > pel2.ordering )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare of PriorTable distinguising state data. Using a compare of the
+ * pointers is a little more strict than it needs be. It requires that
+ * prioritiy tables have the exact same set of priority assignment operators
+ * (from the input lang) to be considered equal.
+ *
+ * Really only key-value pairs need be tested and ordering be merged. However
+ * this would require that in the fuseing of states, priority descriptors be
+ * chosen for the new fused state based on priority. Since the out transition
+ * lists and ranges aren't necessarily going to line up, this is more work for
+ * little gain. Final compression resets all priorities first, so this would
+ * only be useful for compression at every operator, which is only an
+ * undocumented test feature.
+ */
+typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
+
+/* Plain action list that imposes no ordering. */
+typedef Vector<int> TransFuncList;
+
+/* Comparison for TransFuncList. */
+typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
+
+/* Transition class that implements actions and priorities. */
+struct TransAp
+{
+ TransAp() : fromState(0), toState(0) {}
+ TransAp( const TransAp &other ) :
+ lowKey(other.lowKey),
+ highKey(other.highKey),
+ fromState(0), toState(0),
+ actionTable(other.actionTable),
+ priorTable(other.priorTable)
+ {
+ assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
+ }
+
+ Key lowKey, highKey;
+ StateAp *fromState;
+ StateAp *toState;
+
+ /* Pointers for outlist. */
+ TransAp *prev, *next;
+
+ /* Pointers for in-list. */
+ TransAp *ilprev, *ilnext;
+
+ /* The function table and priority for the transition. */
+ ActionTable actionTable;
+ PriorTable priorTable;
+
+ LmActionTable lmActionTable;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct TransInList
+{
+ TransInList() : head(0) { }
+
+ TransAp *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const TransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator TransAp*() const { return ptr; }
+ TransAp &operator *() const { return *ptr; }
+ TransAp *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ TransAp *ptr;
+ };
+};
+
+typedef DList<TransAp> TransList;
+
+/* Set of states, list of states. */
+typedef BstSet<StateAp*> StateSet;
+typedef DList<StateAp> StateList;
+
+/* A element in a state dict. */
+struct StateDictEl
+:
+ public AvlTreeEl<StateDictEl>
+{
+ StateDictEl(const StateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const StateSet &getKey() { return stateSet; }
+ StateSet stateSet;
+ StateAp *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict;
+
+/* Data needed for a merge operation. */
+struct MergeData
+{
+ MergeData()
+ : stfillHead(0), stfillTail(0) { }
+
+ StateDict stateDict;
+
+ StateAp *stfillHead;
+ StateAp *stfillTail;
+
+ void fillListAppend( StateAp *state );
+};
+
+struct TransEl
+{
+ /* Constructors. */
+ TransEl() { }
+ TransEl( Key lowKey, Key highKey )
+ : lowKey(lowKey), highKey(highKey) { }
+ TransEl( Key lowKey, Key highKey, TransAp *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ TransAp *value;
+};
+
+struct CmpKey
+{
+ static int compare( const Key key1, const Key key2 )
+ {
+ if ( key1 < key2 )
+ return -1;
+ else if ( key1 > key2 )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Vector based set of key items. */
+typedef BstSet<Key, CmpKey> KeySet;
+
+struct MinPartition
+{
+ MinPartition() : active(false) { }
+
+ StateList list;
+ bool active;
+
+ MinPartition *prev, *next;
+};
+
+/* Epsilon transition stored in a state. Specifies the target */
+typedef Vector<int> EpsilonTrans;
+
+/* List of states that are to be drawn into this. */
+struct EptVectEl
+{
+ EptVectEl( StateAp *targ, bool leaving )
+ : targ(targ), leaving(leaving) { }
+
+ StateAp *targ;
+ bool leaving;
+};
+typedef Vector<EptVectEl> EptVect;
+
+/* Set of entry ids that go into this state. */
+typedef BstSet<int> EntryIdSet;
+
+/* Set of longest match items that may be active in a given state. */
+typedef BstSet<LongestMatchPart*> LmItemSet;
+
+/* Conditions. */
+typedef BstSet< Action*, CmpOrd<Action*> > CondSet;
+typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet;
+
+struct CondSpace
+ : public AvlTreeEl<CondSpace>
+{
+ CondSpace( const CondSet &condSet )
+ : condSet(condSet) {}
+
+ const CondSet &getKey() { return condSet; }
+
+ CondSet condSet;
+ Key baseKey;
+ long condSpaceId;
+};
+
+typedef Vector<CondSpace*> CondSpaceVect;
+
+typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
+
+struct StateCond
+{
+ StateCond( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey) {}
+
+ Key lowKey;
+ Key highKey;
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+
+typedef DList<StateCond> StateCondList;
+typedef Vector<long> LongVect;
+
+struct Expansion
+{
+ Expansion( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey),
+ fromTrans(0), fromCondSpace(0),
+ toCondSpace(0) {}
+
+ ~Expansion()
+ {
+ if ( fromTrans != 0 )
+ delete fromTrans;
+ }
+
+ Key lowKey;
+ Key highKey;
+
+ TransAp *fromTrans;
+ CondSpace *fromCondSpace;
+ long fromVals;
+
+ CondSpace *toCondSpace;
+ LongVect toValsList;
+
+ Expansion *prev, *next;
+};
+
+typedef DList<Expansion> ExpansionList;
+
+struct Removal
+{
+ Key lowKey;
+ Key highKey;
+
+ Removal *next;
+};
+
+struct CondData
+{
+ CondData() : nextCondKey(0) {}
+
+ /* Condition info. */
+ Key nextCondKey;
+
+ CondSpaceMap condSpaceMap;
+};
+
+extern CondData *condData;
+
+/* State class that implements actions and priorities. */
+struct StateAp
+{
+ StateAp();
+ StateAp(const StateAp &other);
+ ~StateAp();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ /* Out transition list and the pointer for the default out trans. */
+ TransList outList;
+
+ /* In transition Lists. */
+ TransInList inList;
+
+ /* Entry points into the state. */
+ EntryIdSet entryIds;
+
+ /* Epsilon transitions. */
+ EpsilonTrans epsilonTrans;
+
+ /* Condition info. */
+ StateCondList stateCondList;
+
+ /* Number of in transitions from states other than ourselves. */
+ int foreignInTrans;
+
+ /* Temporary data for various algorithms. */
+ union {
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ StateAp *stateMap;
+
+ /* When minimizing machines by partitioning, this maps to the group
+ * the state is in. */
+ MinPartition *partition;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ StateAp *next;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ } alg;
+
+ /* Data used in epsilon operation, maybe fit into alg? */
+ StateAp *isolatedShadow;
+ int owningGraph;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ StateDictEl *stateDictEl;
+
+ /* When drawing epsilon transitions, holds the list of states to merge
+ * with. */
+ EptVect *eptVect;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ StateAp *next, *prev;
+
+ /*
+ * Priority and Action data.
+ */
+
+ /* Out priorities transfered to out transitions. */
+ PriorTable outPriorTable;
+
+ /* The following two action tables are distinguished by the fact that when
+ * toState actions are executed immediatly after transition actions of
+ * incoming transitions and the current character will be the same as the
+ * one available then. The fromState actions are executed immediately
+ * before the transition actions of outgoing transitions and the current
+ * character is same as the one available then. */
+
+ /* Actions to execute upon entering into a state. */
+ ActionTable toStateActionTable;
+
+ /* Actions to execute when going from the state to the transition. */
+ ActionTable fromStateActionTable;
+
+ /* Actions to add to any future transitions that leave via this state. */
+ ActionTable outActionTable;
+
+ /* Conditions to add to any future transiions that leave via this sttate. */
+ ActionSet outCondSet;
+
+ /* Error action tables. */
+ ErrActionTable errActionTable;
+
+ /* Actions to execute on eof. */
+ ActionTable eofActionTable;
+
+ /* Set of longest match items that may be active in this state. */
+ LmItemSet lmItemSet;
+};
+
+template <class ListItem> struct NextTrans
+{
+ Key lowKey, highKey;
+ ListItem *trans;
+ ListItem *next;
+
+ void load() {
+ if ( trans == 0 )
+ next = 0;
+ else {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ void set( ListItem *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+
+/* Encodes the different states that are meaningful to the of the iterator. */
+enum PairIterUserState
+{
+ RangeInS1, RangeInS2,
+ RangeOverlap,
+ BreakS1, BreakS2
+};
+
+template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
+{
+ /* Encodes the different states that an fsm iterator can be in. */
+ enum IterState {
+ Begin,
+ ConsumeS1Range, ConsumeS2Range,
+ OnlyInS1Range, OnlyInS2Range,
+ S1SticksOut, S1SticksOutBreak,
+ S2SticksOut, S2SticksOutBreak,
+ S1DragsBehind, S1DragsBehindBreak,
+ S2DragsBehind, S2DragsBehindBreak,
+ ExactOverlap, End
+ };
+
+ PairIter( ListItem1 *list1, ListItem2 *list2 );
+
+ /* Query iterator. */
+ bool lte() { return itState != End; }
+ bool end() { return itState == End; }
+ void operator++(int) { findNext(); }
+ void operator++() { findNext(); }
+
+ /* Iterator state. */
+ ListItem1 *list1;
+ ListItem2 *list2;
+ IterState itState;
+ PairIterUserState userState;
+
+ NextTrans<ListItem1> s1Tel;
+ NextTrans<ListItem2> s2Tel;
+ Key bottomLow, bottomHigh;
+ ListItem1 *bottomTrans1;
+ ListItem2 *bottomTrans2;
+
+private:
+ void findNext();
+};
+
+/* Init the iterator by advancing to the first item. */
+template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
+ ListItem1 *list1, ListItem2 *list2 )
+:
+ list1(list1),
+ list2(list2),
+ itState(Begin)
+{
+ findNext();
+}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN(label) \
+ itState = label; \
+ return; \
+ entry##label: backIn = true
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN2(label, uState) \
+ itState = label; \
+ userState = uState; \
+ return; \
+ entry##label: backIn = true
+
+/* Advance to the next transition. When returns, trans points to the next
+ * transition, unless there are no more, in which case end() returns true. */
+template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
+{
+ /* This variable is used in dummy statements that follow the entry
+ * goto labels. The compiler needs some statement to follow the label. */
+ bool backIn;
+
+ /* Jump into the iterator routine base on the iterator state. */
+ switch ( itState ) {
+ case Begin: goto entryBegin;
+ case ConsumeS1Range: goto entryConsumeS1Range;
+ case ConsumeS2Range: goto entryConsumeS2Range;
+ case OnlyInS1Range: goto entryOnlyInS1Range;
+ case OnlyInS2Range: goto entryOnlyInS2Range;
+ case S1SticksOut: goto entryS1SticksOut;
+ case S1SticksOutBreak: goto entryS1SticksOutBreak;
+ case S2SticksOut: goto entryS2SticksOut;
+ case S2SticksOutBreak: goto entryS2SticksOutBreak;
+ case S1DragsBehind: goto entryS1DragsBehind;
+ case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
+ case S2DragsBehind: goto entryS2DragsBehind;
+ case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
+ case ExactOverlap: goto entryExactOverlap;
+ case End: goto entryEnd;
+ }
+
+entryBegin:
+ /* Set up the next structs at the head of the transition lists. */
+ s1Tel.set( list1 );
+ s2Tel.set( list2 );
+
+ /* Concurrently scan both out ranges. */
+ while ( true ) {
+ if ( s1Tel.trans == 0 ) {
+ /* We are at the end of state1's ranges. Process the rest of
+ * state2's ranges. */
+ while ( s2Tel.trans != 0 ) {
+ /* Range is only in s2. */
+ CO_RETURN2( ConsumeS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ break;
+ }
+ else if ( s2Tel.trans == 0 ) {
+ /* We are at the end of state2's ranges. Process the rest of
+ * state1's ranges. */
+ while ( s1Tel.trans != 0 ) {
+ /* Range is only in s1. */
+ CO_RETURN2( ConsumeS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ break;
+ }
+ /* Both state1's and state2's transition elements are good.
+ * The signiture of no overlap is a back key being in front of a
+ * front key. */
+ else if ( s1Tel.highKey < s2Tel.lowKey ) {
+ /* A range exists in state1 that does not overlap with state2. */
+ CO_RETURN2( OnlyInS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.lowKey ) {
+ /* A range exists in state2 that does not overlap with state1. */
+ CO_RETURN2( OnlyInS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ /* There is overlap, must mix the ranges in some way. */
+ else if ( s1Tel.lowKey < s2Tel.lowKey ) {
+ /* Range from state1 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s2Tel.lowKey;
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.lowKey;
+ s1Tel.highKey.decrement();
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s1Tel[0,1].value. */
+ CO_RETURN2( S1SticksOutBreak, BreakS1 );
+
+ /* Broken off range is only in s1. */
+ CO_RETURN2( S1SticksOut, RangeInS1 );
+
+ /* Advance over the part sticking out front. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+ }
+ else if ( s2Tel.lowKey < s1Tel.lowKey ) {
+ /* Range from state2 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s1Tel.lowKey;
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.lowKey;
+ s2Tel.highKey.decrement();
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2SticksOutBreak, BreakS2 );
+
+ /* Broken off range is only in s2. */
+ CO_RETURN2( S2SticksOut, RangeInS2 );
+
+ /* Advance over the part sticking out front. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+ }
+ /* Low ends are even. Are the high ends even? */
+ else if ( s1Tel.highKey < s2Tel.highKey ) {
+ /* Range from state2 goes longer than the range from state1. We
+ * must break the range from state2 into an evenly overlaping
+ * segment. */
+ bottomLow = s1Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.highKey;
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2DragsBehindBreak, BreakS2 );
+
+ /* Breaking s2 produces exact overlap. */
+ CO_RETURN2( S2DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 2. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+
+ /* Advance over the entire s1Tel. We have consumed it. */
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.highKey ) {
+ /* Range from state1 goes longer than the range from state2. We
+ * must break the range from state1 into an evenly overlaping
+ * segment. */
+ bottomLow = s2Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.highKey;
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S1DragsBehindBreak, BreakS1 );
+
+ /* Breaking s1 produces exact overlap. */
+ CO_RETURN2( S1DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 1. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+
+ /* Advance over the entire s2Tel. We have consumed it. */
+ s2Tel.increment();
+ }
+ else {
+ /* There is an exact overlap. */
+ CO_RETURN2( ExactOverlap, RangeOverlap );
+
+ s1Tel.increment();
+ s2Tel.increment();
+ }
+ }
+
+ /* Done, go into end state. */
+ CO_RETURN( End );
+}
+
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare class for the Approximate minimization. */
+class ApproxCompare
+{
+public:
+ ApproxCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for the initial partitioning of a partition minimization. */
+class InitPartitionCompare
+{
+public:
+ InitPartitionCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for the regular partitioning of a partition minimization. */
+class PartitionCompare
+{
+public:
+ PartitionCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for a minimization that marks pairs. Provides the shouldMark
+ * routine. */
+class MarkCompare
+{
+public:
+ MarkCompare() { }
+ bool shouldMark( MarkIndex &markIndex, const StateAp *pState1,
+ const StateAp *pState2 );
+};
+
+/* List of partitions. */
+typedef DList< MinPartition > PartitionList;
+
+/* List of transtions out of a state. */
+typedef Vector<TransEl> TransListVect;
+
+/* Entry point map used for keeping track of entry points in a machine. */
+typedef BstSet< int > EntryIdSet;
+typedef BstMapEl< int, StateAp* > EntryMapEl;
+typedef BstMap< int, StateAp* > EntryMap;
+typedef Vector<EntryMapEl> EntryMapBase;
+
+/* Graph class that implements actions and priorities. */
+struct FsmAp
+{
+ /* Constructors/Destructors. */
+ FsmAp( );
+ FsmAp( const FsmAp &graph );
+ ~FsmAp();
+
+ /* The list of states. */
+ StateList stateList;
+ StateList misfitList;
+
+ /* The map of entry points. */
+ EntryMap entryPoints;
+
+ /* The start state. */
+ StateAp *startState;
+
+ /* The set of final states. */
+ StateSet finStateSet;
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ bool misfitAccounting;
+
+ /*
+ * Transition actions and priorities.
+ */
+
+ /* Set priorities on transtions. */
+ void startFsmPrior( int ordering, PriorDesc *prior );
+ void allTransPrior( int ordering, PriorDesc *prior );
+ void finishFsmPrior( int ordering, PriorDesc *prior );
+ void leaveFsmPrior( int ordering, PriorDesc *prior );
+
+ /* Action setting support. */
+ void transferErrorActions( StateAp *state, int transferPoint );
+ void setErrorAction( StateAp *state, int ordering, Action *action );
+
+ /* Fill all spaces in a transition list with an error transition. */
+ void fillGaps( StateAp *state );
+
+ /* Similar to setErrorAction, instead gives a state to go to on error. */
+ void setErrorTarget( StateAp *state, StateAp *target, int *orderings,
+ Action **actions, int nActs );
+
+ /* Set actions to execute. */
+ void startFsmAction( int ordering, Action *action );
+ void allTransAction( int ordering, Action *action );
+ void finishFsmAction( int ordering, Action *action );
+ void leaveFsmAction( int ordering, Action *action );
+ void longMatchAction( int ordering, LongestMatchPart *lmPart );
+
+ /* Set conditions. */
+ CondSpace *addCondSpace( const CondSet &condSet );
+
+ void findEmbedExpansions( ExpansionList &expansionList,
+ StateAp *destState, Action *condAction );
+ void embedCondition( MergeData &md, StateAp *state, Action *condAction );
+ void embedCondition( StateAp *state, Action *condAction );
+
+ void startFsmCondition( Action *condAction );
+ void allTransCondition( Action *condAction );
+ void leaveFsmCondition( Action *condAction );
+
+ /* Set error actions to execute. */
+ void startErrorAction( int ordering, Action *action, int transferPoint );
+ void allErrorAction( int ordering, Action *action, int transferPoint );
+ void finalErrorAction( int ordering, Action *action, int transferPoint );
+ void notStartErrorAction( int ordering, Action *action, int transferPoint );
+ void notFinalErrorAction( int ordering, Action *action, int transferPoint );
+ void middleErrorAction( int ordering, Action *action, int transferPoint );
+
+ /* Set EOF actions. */
+ void startEOFAction( int ordering, Action *action );
+ void allEOFAction( int ordering, Action *action );
+ void finalEOFAction( int ordering, Action *action );
+ void notStartEOFAction( int ordering, Action *action );
+ void notFinalEOFAction( int ordering, Action *action );
+ void middleEOFAction( int ordering, Action *action );
+
+ /* Set To State actions. */
+ void startToStateAction( int ordering, Action *action );
+ void allToStateAction( int ordering, Action *action );
+ void finalToStateAction( int ordering, Action *action );
+ void notStartToStateAction( int ordering, Action *action );
+ void notFinalToStateAction( int ordering, Action *action );
+ void middleToStateAction( int ordering, Action *action );
+
+ /* Set From State actions. */
+ void startFromStateAction( int ordering, Action *action );
+ void allFromStateAction( int ordering, Action *action );
+ void finalFromStateAction( int ordering, Action *action );
+ void notStartFromStateAction( int ordering, Action *action );
+ void notFinalFromStateAction( int ordering, Action *action );
+ void middleFromStateAction( int ordering, Action *action );
+
+ /* Shift the action ordering of the start transitions to start at
+ * fromOrder and increase in units of 1. Useful before kleene star
+ * operation. */
+ int shiftStartActionOrder( int fromOrder );
+
+ /* Clear all priorities from the fsm to so they won't affcet minimization
+ * of the final fsm. */
+ void clearAllPriorities();
+
+ /* Zero out all the function keys. */
+ void nullActionKeys();
+
+ /* Walk the list of states and verify state properties. */
+ void verifyStates();
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ void setMisfitAccounting( bool val )
+ { misfitAccounting = val; }
+
+ /* Set and Unset a state as final. */
+ void setFinState( StateAp *state );
+ void unsetFinState( StateAp *state );
+
+ void setStartState( StateAp *state );
+ void unsetStartState( );
+
+ /* Set and unset a state as an entry point. */
+ void setEntry( int id, StateAp *state );
+ void changeEntry( int id, StateAp *to, StateAp *from );
+ void unsetEntry( int id, StateAp *state );
+ void unsetEntry( int id );
+ void unsetAllEntryPoints();
+
+ /* Epsilon transitions. */
+ void epsilonTrans( int id );
+ void shadowReadWriteStates( MergeData &md );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
+ void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
+
+ /* Attach with a new transition. */
+ TransAp *attachNewTrans( StateAp *from, StateAp *to,
+ Key onChar1, Key onChar2 );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Redirect a transition away from error and towards some state. */
+ void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( StateAp *state );
+
+ /*
+ * NFA to DFA conversion routines.
+ */
+
+ /* Duplicate a transition that will dropin to a free spot. */
+ TransAp *dupTrans( StateAp *from, TransAp *srcTrans );
+
+ /* In crossing, two transitions both go to real states. */
+ TransAp *fsmAttachStates( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ /* Two transitions are to be crossed, handle the possibility of either
+ * going to the error state. */
+ TransAp *mergeTrans( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ /* Compare deterimne relative priorities of two transition tables. */
+ int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
+
+ /* Cross a src transition with one that is already occupying a spot. */
+ TransAp *crossTransitions( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList );
+
+ void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 );
+ void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 );
+ void findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long destVals, LongVect &toValsList );
+ void findTransExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState );
+ void findCondExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState );
+ void mergeStateConds( StateAp *destState, StateAp *srcState );
+
+ /* Merge a set of states into newState. */
+ void mergeStates( MergeData &md, StateAp *destState,
+ StateAp **srcStates, int numSrc );
+ void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState );
+ void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState );
+
+ /* Make all states that are combinations of other states and that
+ * have not yet had their out transitions filled in. This will
+ * empty out stateDict and stFil. */
+ void fillInStates( MergeData &md );
+
+ /*
+ * Transition Comparison.
+ */
+
+ /* Compare transition data. Either of the pointers may be null. */
+ static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Compare target state and transition data. Either pointer may be null. */
+ static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Compare target partitions. Either pointer may be null. */
+ static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Check marked status of target states. Either pointer may be null. */
+ static inline bool shouldMarkPtr( MarkIndex &markIndex,
+ TransAp *trans1, TransAp *trans2 );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Compare priority and function table of transitions. */
+ static int compareTransData( TransAp *trans1, TransAp *trans2 );
+
+ /* Add in the properties of srcTrans into this. */
+ void addInTrans( TransAp *destTrans, TransAp *srcTrans );
+
+ /* Compare states on data stored in the states. */
+ static int compareStateData( const StateAp *state1, const StateAp *state2 );
+
+ /* Out transition data. */
+ void clearOutData( StateAp *state );
+ bool hasOutData( StateAp *state );
+ void transferOutData( StateAp *destState, StateAp *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ StateAp *addState();
+
+ /*
+ * Building basic machines
+ */
+
+ void concatFsm( Key c );
+ void concatFsm( Key *str, int len );
+ void concatFsmCI( Key *str, int len );
+ void orFsm( Key *set, int len );
+ void rangeFsm( Key low, Key high );
+ void rangeStarFsm( Key low, Key high );
+ void emptyFsm( );
+ void lambdaFsm( );
+
+ /*
+ * Fsm operators.
+ */
+
+ void starOp( );
+ void repeatOp( int times );
+ void optionalRepeatOp( int times );
+ void concatOp( FsmAp *other );
+ void unionOp( FsmAp *other );
+ void intersectOp( FsmAp *other );
+ void subtractOp( FsmAp *other );
+ void epsilonOp();
+ void joinOp( int startId, int finalId, FsmAp **others, int numOthers );
+ void globOp( FsmAp **others, int numOthers );
+ void deterministicEntry();
+
+ /*
+ * Operator workers
+ */
+
+ /* Determine if there are any entry points into a start state other than
+ * the start state. */
+ bool isStartStateIsolated();
+
+ /* Make a new start state that has no entry points. Will not change the
+ * identity of the fsm. */
+ void isolateStartState();
+
+ /* Workers for resolving epsilon transitions. */
+ bool inEptVect( EptVect *eptVect, StateAp *targ );
+ void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving );
+ void resolveEpsilonTrans( MergeData &md );
+
+ /* Workers for concatenation and union. */
+ void doConcat( FsmAp *other, StateSet *fromStates, bool optional );
+ void doOr( FsmAp *other );
+
+ /*
+ * Final states
+ */
+
+ /* Unset any final states that are no longer to be final
+ * due to final bits. */
+ void unsetIncompleteFinals();
+ void unsetKilledFinals();
+
+ /* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+ void copyInEntryPoints( FsmAp *other );
+
+ /* Set State numbers starting at 0. */
+ void setStateNumbers();
+
+ /* Unset all final states. */
+ void unsetAllFinStates();
+
+ /* Set the bits of final states and clear the bits of non final states. */
+ void setFinBits( int finStateBits );
+
+ /*
+ * Self-consistency checks.
+ */
+
+ /* Run a sanity check on the machine. */
+ void verifyIntegrity();
+
+ /* Verify that there are no unreachable states, or dead end states. */
+ void verifyReachability();
+ void verifyNoDeadEndStates();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHereReverse( StateAp *state );
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( StateAp *state );
+ void markReachableFromHereStopFinal( StateAp *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeDeadEndStates();
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( StateAp *state );
+ bool anyErrorRange( StateAp *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+ /*
+ * FSM Minimization
+ */
+
+ /* Minimization by partitioning. */
+ void minimizePartition1();
+ void minimizePartition2();
+
+ /* Minimize the final state Machine. The result is the minimal fsm. Slow
+ * but stable, correct minimization. Uses n^2 space (lookout) and average
+ * n^2 time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeStable();
+
+ /* Minimize the final state machine. Does not find the minimal fsm, but a
+ * pretty good approximation. Does not use any extra space. Average n^2
+ * time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeApproximate();
+
+ /* This is the worker for the minimize approximate solution. It merges
+ * states that have identical out transitions. */
+ bool minimizeRound( );
+
+ /* Given an intial partioning of states, split partitions that have out trans
+ * to differing partitions. */
+ int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts );
+
+ /* Split partitions that have a transition to a previously split partition, until
+ * there are no more partitions to split. */
+ int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts );
+
+ /* Fuse together states in the same partition. */
+ void fusePartitions( MinPartition *parts, int numParts );
+
+ /* Mark pairs where out final stateness differs, out trans data differs,
+ * trans pairs go to a marked pair or trans data differs. Should get
+ * alot of pairs. */
+ void initialMarkRound( MarkIndex &markIndex );
+
+ /* One marking round on all state pairs. Considers if trans pairs go
+ * to a marked state only. Returns whether or not a pair was marked. */
+ bool markRound( MarkIndex &markIndex );
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(StateAp *dest, StateAp *src);
+
+ /* Make state src and dest the same state. */
+ void fuseEquivStates(StateAp *dest, StateAp *src);
+
+ /* Find any states that didn't get marked by the marking algorithm and
+ * merge them into the primary states of their equivalence class. */
+ void fuseUnmarkedPairs( MarkIndex &markIndex );
+
+ /* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+ void compressTransitions();
+};
+
+
+#endif /* _FSMGRAPH_H */
diff --git a/ragel/fsmmin.cpp b/ragel/fsmmin.cpp
new file mode 100644
index 0000000..c57de6f
--- /dev/null
+++ b/ragel/fsmmin.cpp
@@ -0,0 +1,732 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "fsmgraph.h"
+#include "mergesort.h"
+
+int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort object and a single partition compare. */
+ MergeSort<StateAp*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* For each partition. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = parts[p].list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = parts[p].list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ int destPart = p, firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = numParts;
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != p ) {
+ StateAp *state = parts[p].list.detach( statePtrs[s] );
+ parts[destPart].list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+ }
+
+ return numParts;
+}
+
+/**
+ * \brief Minimize by partitioning version 1.
+ *
+ * Repeatedly tries to split partitions until all partitions are unsplittable.
+ * Produces the most minimal FSM possible.
+ */
+void FsmAp::minimizePartition1()
+{
+ /* Need one mergesort object and partition compares. */
+ MergeSort<StateAp*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ StateAp** statePtrs = new StateAp*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = destPart + 1;
+ while ( true ) {
+ /* Test all partitions for splitting. */
+ int newNum = partitionRound( statePtrs, parts, numParts );
+
+ /* When no partitions can be split, stop. */
+ if ( newNum == numParts )
+ break;
+
+ numParts = newNum;
+ }
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+/* Split partitions that need splittting, decide which partitions might need
+ * to be split as a result, continue until there are no more that might need
+ * to be split. */
+int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort and a partition compare. */
+ MergeSort<StateAp*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* The lists of unsplitable (partList) and splitable partitions.
+ * Only partitions in the splitable list are check for needing splitting. */
+ PartitionList partList, splittable;
+
+ /* Initially, all partitions are born from a split (the initial
+ * partitioning) and can cause other partitions to be split. So any
+ * partition with a state with a transition out to another partition is a
+ * candidate for splitting. This will make every partition except possibly
+ * partitions of final states split candidates. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume not active. */
+ parts[p].active = false;
+
+ /* Look for a trans out of any state in the partition. */
+ for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
+ /* If there is at least one transition out to another state then
+ * the partition becomes splittable. */
+ if ( state->outList.length() > 0 ) {
+ parts[p].active = true;
+ break;
+ }
+ }
+
+ /* If it was found active then it goes on the splittable list. */
+ if ( parts[p].active )
+ splittable.append( &parts[p] );
+ else
+ partList.append( &parts[p] );
+ }
+
+ /* While there are partitions that are splittable, pull one off and try
+ * to split it. If it splits, determine which partitions may now be split
+ * as a result of the newly split partition. */
+ while ( splittable.length() > 0 ) {
+ MinPartition *partition = splittable.detachFirst();
+
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = partition->list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = partition->list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ MinPartition *destPart = partition;
+ int firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = &parts[numParts];
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != partition ) {
+ StateAp *state = partition->list.detach( statePtrs[s] );
+ destPart->list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ int newPart;
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+
+ /* Put the partition we just split and any new partitions that came out
+ * of the split onto the inactive list. */
+ partition->active = false;
+ partList.append( partition );
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ parts[newPart].active = false;
+ partList.append( &parts[newPart] );
+ }
+
+ if ( destPart == partition )
+ continue;
+
+ /* Now determine which partitions are splittable as a result of
+ * splitting partition by walking the in lists of the states in
+ * partitions that got split. Partition is the faked first item in the
+ * loop. */
+ MinPartition *causalPart = partition;
+ newPart = firstNewPart - 1;
+ while ( newPart < numParts ) {
+ /* Loop all states in the causal partition. */
+ StateList::Iter state = causalPart->list;
+ for ( ; state.lte(); state++ ) {
+ /* Walk all transition into the state and put the partition
+ * that the from state is in onto the splittable list. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
+ MinPartition *fromPart = trans->fromState->alg.partition;
+ if ( ! fromPart->active ) {
+ fromPart->active = true;
+ partList.detach( fromPart );
+ splittable.append( fromPart );
+ }
+ }
+ }
+
+ newPart += 1;
+ causalPart = &parts[newPart];
+ }
+ }
+ return numParts;
+}
+
+
+/**
+ * \brief Minimize by partitioning version 2 (best alg).
+ *
+ * Repeatedly tries to split partitions that may splittable until there are no
+ * more partitions that might possibly need splitting. Runs faster than
+ * version 1. Produces the most minimal fsm possible.
+ */
+void FsmAp::minimizePartition2()
+{
+ /* Need a mergesort and an initial partition compare. */
+ MergeSort<StateAp*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ StateAp** statePtrs = new StateAp*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = splitCandidates( statePtrs, parts, destPart+1 );
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+void FsmAp::initialMarkRound( MarkIndex &markIndex )
+{
+ /* P and q for walking pairs. */
+ StateAp *p = stateList.head, *q;
+
+ /* Need an initial partition compare. */
+ InitPartitionCompare initPartCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* If the states differ on final state status, out transitions or
+ * any transition data then they should be separated on the initial
+ * round. */
+ if ( initPartCompare.compare( p, q ) != 0 )
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+
+ q = q->next;
+ }
+ p = p->next;
+ }
+}
+
+bool FsmAp::markRound( MarkIndex &markIndex )
+{
+ /* P an q for walking pairs. Take note if any pair gets marked. */
+ StateAp *p = stateList.head, *q;
+ bool pairWasMarked = false;
+
+ /* Need a mark comparison. */
+ MarkCompare markCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* Should we mark the pair? */
+ if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ if ( markCompare.shouldMark( markIndex, p, q ) ) {
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+ pairWasMarked = true;
+ }
+ }
+ q = q->next;
+ }
+ p = p->next;
+ }
+
+ return pairWasMarked;
+}
+
+
+/**
+ * \brief Minimize by pair marking.
+ *
+ * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
+ * should only be used on small graphs. Produces the most minmimal FSM
+ * possible.
+ */
+void FsmAp::minimizeStable()
+{
+ /* Set the state numbers. */
+ setStateNumbers();
+
+ /* This keeps track of which pairs have been marked. */
+ MarkIndex markIndex( stateList.length() );
+
+ /* Mark pairs where final stateness, out trans, or trans data differ. */
+ initialMarkRound( markIndex );
+
+ /* While the last round of marking succeeded in marking a state
+ * continue to do another round. */
+ int modified = markRound( markIndex );
+ while (modified)
+ modified = markRound( markIndex );
+
+ /* Merge pairs that are unmarked. */
+ fuseUnmarkedPairs( markIndex );
+}
+
+bool FsmAp::minimizeRound()
+{
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return false;
+
+ /* Need a mergesort on approx compare and an approx compare. */
+ MergeSort<StateAp*, ApproxCompare> mergeSort;
+ ApproxCompare approxCompare;
+
+ /* Fill up an array of pointers to the states. */
+ StateAp **statePtrs = new StateAp*[stateList.length()];
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ bool modified = false;
+
+ /* Sort The list. */
+ mergeSort.sort( statePtrs, stateList.length() );
+
+ /* Walk the list looking for duplicates next to each other,
+ * merge in any duplicates. */
+ StateAp **pLast = statePtrs;
+ StateAp **pState = statePtrs + 1;
+ for ( int i = 1; i < stateList.length(); i++, pState++ ) {
+ if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
+ /* Last and pState are the same, so fuse together. Move forward
+ * with pState but not with pLast. If any more are identical, we
+ * must */
+ fuseEquivStates( *pLast, *pState );
+ modified = true;
+ }
+ else {
+ /* Last and this are different, do not set to merge them. Move
+ * pLast to the current (it may be way behind from merging many
+ * states) and pState forward one to consider the next pair. */
+ pLast = pState;
+ }
+ }
+ delete[] statePtrs;
+ return modified;
+}
+
+/**
+ * \brief Minmimize by an approximation.
+ *
+ * Repeatedly tries to find states with transitions out to the same set of
+ * states on the same set of keys until no more identical states can be found.
+ * Does not produce the most minimial FSM possible.
+ */
+void FsmAp::minimizeApproximate()
+{
+ /* While the last minimization round succeeded in compacting states,
+ * continue to try to compact states. */
+ while ( true ) {
+ bool modified = minimizeRound();
+ if ( ! modified )
+ break;
+ }
+}
+
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void FsmAp::removeUnreachableStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ StateAp *state = stateList.head;
+ while ( state ) {
+ StateAp *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+bool FsmAp::outListCovers( StateAp *state )
+{
+ /* Must be at least one range to cover. */
+ if ( state->outList.length() == 0 )
+ return false;
+
+ /* The first must start at the lower bound. */
+ TransList::Iter trans = state->outList.first();
+ if ( keyOps->minKey < trans->lowKey )
+ return false;
+
+ /* Loop starts at second el. */
+ trans.increment();
+
+ /* Loop checks lower against prev upper. */
+ for ( ; trans.lte(); trans++ ) {
+ /* Lower end of the trans must be one greater than the
+ * previous' high end. */
+ Key lowKey = trans->lowKey;
+ lowKey.decrement();
+ if ( trans->prev->highKey < lowKey )
+ return false;
+ }
+
+ /* Require that the last range extends to the upper bound. */
+ trans = state->outList.last();
+ if ( trans->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+/* Remove states that that do not lead to a final states. Works recursivly traversing
+ * the graph in reverse (starting from all final states) and marking seen states. Then
+ * removes states that did not get marked. */
+void FsmAp::removeDeadEndStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all states that have paths to the final states. */
+ StateAp **st = finStateSet.data;
+ int nst = finStateSet.length();
+ for ( int i = 0; i < nst; i++, st++ )
+ markReachableFromHereReverse( *st );
+
+ /* Start state gets honorary marking. If the machine accepts nothing we
+ * still want the start state to hang around. This must be done after the
+ * recursive call on all the final states so that it does not cause the
+ * start state in transitions to be skipped when the start state is
+ * visited by the traversal. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ StateAp *state = stateList.head;
+ while ( state != 0 ) {
+ StateAp *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+/* Remove states on the misfit list. To work properly misfit accounting should
+ * be on when this is called. The detaching of a state will likely cause
+ * another misfit to be collected and it can then be removed. */
+void FsmAp::removeMisfits()
+{
+ while ( misfitList.length() > 0 ) {
+ /* Get the first state. */
+ StateAp *state = misfitList.head;
+
+ /* Detach and delete. */
+ detachState( state );
+
+ /* The state was previously on the misfit list and detaching can only
+ * remove in transitions so the state must still be on the misfit
+ * list. */
+ misfitList.detach( state );
+ delete state;
+ }
+}
+
+/* Fuse src into dest because they have been deemed equivalent states.
+ * Involves moving transitions into src to go into dest and invoking
+ * callbacks. Src is deleted detached from the graph and deleted. */
+void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src )
+{
+ /* This would get ugly. */
+ assert( dest != src );
+
+ /* Cur is a duplicate. We can merge it with trail. */
+ inTransMove( dest, src );
+
+ detachState( src );
+ stateList.detach( src );
+ delete src;
+}
+
+void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex )
+{
+ StateAp *p = stateList.head, *nextP, *q;
+
+ /* Definition: The primary state of an equivalence class is the first state
+ * encounterd that belongs to the equivalence class. All equivalence
+ * classes have primary state including equivalence classes with one state
+ * in it. */
+
+ /* For each unmarked pair merge p into q and delete p. q is always the
+ * primary state of it's equivalence class. We wouldn't have landed on it
+ * here if it were not, because it would have been deleted.
+ *
+ * Proof that q is the primaray state of it's equivalence class: Assume q
+ * is not the primary state of it's equivalence class, then it would be
+ * merged into some state that came before it and thus p would be
+ * equivalent to that state. But q is the first state that p is equivalent
+ * to so we have a contradiction. */
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ nextP = p->next;
+
+ q = stateList.head;
+ while ( q != p ) {
+ /* If one of p or q is a final state then mark. */
+ if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ fuseEquivStates( q, p );
+ break;
+ }
+ q = q->next;
+ }
+ p = nextP;
+ }
+}
+
+void FsmAp::fusePartitions( MinPartition *parts, int numParts )
+{
+ /* For each partition, fuse state 2, 3, ... into state 1. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume that there will always be at least one state. */
+ StateAp *first = parts[p].list.head, *toFuse = first->next;
+
+ /* Put the first state back onto the main state list. Don't bother
+ * removing it from the partition list first. */
+ stateList.append( first );
+
+ /* Fuse the rest of the state into the first. */
+ while ( toFuse != 0 ) {
+ /* Save the next. We will trash it before it is needed. */
+ StateAp *next = toFuse->next;
+
+ /* Put the state to be fused in to the first back onto the main
+ * list before it is fuse. the graph. The state needs to be on
+ * the main list for the detach from the graph to work. Don't
+ * bother removing the state from the partition list first. We
+ * need not maintain it. */
+ stateList.append( toFuse );
+
+ /* Now fuse to the first. */
+ fuseEquivStates( first, toFuse );
+
+ /* Go to the next that we saved before trashing the next pointer. */
+ toFuse = next;
+ }
+
+ /* We transfered the states from the partition list into the main list without
+ * removing the states from the partition list first. Clean it up. */
+ parts[p].list.abandon();
+ }
+}
+
+
+/* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+void FsmAp::compressTransitions()
+{
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->outList.length() > 1 ) {
+ for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
+ Key nextLow = next->lowKey;
+ nextLow.decrement();
+ if ( trans->highKey == nextLow && trans->toState == next->toState &&
+ CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
+ {
+ trans->highKey = next->highKey;
+ st->outList.detach( next );
+ detachTrans( next->fromState, next->toState, next );
+ delete next;
+ next = trans.next();
+ }
+ else {
+ trans.increment();
+ next.increment();
+ }
+ }
+ }
+ }
+}
diff --git a/ragel/fsmstate.cpp b/ragel/fsmstate.cpp
new file mode 100644
index 0000000..4322c10
--- /dev/null
+++ b/ragel/fsmstate.cpp
@@ -0,0 +1,463 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Construct a mark index for a specified number of states. Must new up
+ * an array that is states^2 in size. */
+MarkIndex::MarkIndex( int states ) : numStates(states)
+{
+ /* Total pairs is states^2. Actually only use half of these, but we allocate
+ * them all to make indexing into the array easier. */
+ int total = states * states;
+
+ /* New up chars so that individual DListEl constructors are
+ * not called. Zero out the mem manually. */
+ array = new bool[total];
+ memset( array, 0, sizeof(bool) * total );
+}
+
+/* Free the array used to store state pairs. */
+MarkIndex::~MarkIndex()
+{
+ delete[] array;
+}
+
+/* Mark a pair of states. States are specified by their number. The
+ * marked states are moved from the unmarked list to the marked list. */
+void MarkIndex::markPair(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ array[pos] = true;
+}
+
+/* Returns true if the pair of states are marked. Returns false otherwise.
+ * Ordering of states given does not matter. */
+bool MarkIndex::isPairMarked(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ return array[pos];
+}
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+StateAp::StateAp()
+:
+ /* No out or in transitions. */
+ outList(),
+ inList(),
+
+ /* No entry points, or epsilon trans. */
+ entryIds(),
+ epsilonTrans(),
+
+ /* Conditions. */
+ stateCondList(),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ /* No Priority data. */
+ outPriorTable(),
+
+ /* No Action data. */
+ toStateActionTable(),
+ fromStateActionTable(),
+ outActionTable(),
+ outCondSet(),
+ errActionTable(),
+ eofActionTable()
+{
+}
+
+/* Copy everything except actual the transitions. That is left up to the
+ * FsmAp copy constructor. */
+StateAp::StateAp(const StateAp &other)
+:
+ /* All lists are cleared. They will be filled in when the
+ * individual transitions are duplicated and attached. */
+ outList(),
+ inList(),
+
+ /* Duplicate the entry id set and epsilon transitions. These
+ * are sets of integers and as such need no fixing. */
+ entryIds(other.entryIds),
+ epsilonTrans(other.epsilonTrans),
+
+ /* Copy in the elements of the conditions. */
+ stateCondList( other.stateCondList ),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ /* Copy in priority data. */
+ outPriorTable(other.outPriorTable),
+
+ /* Copy in action data. */
+ toStateActionTable(other.toStateActionTable),
+ fromStateActionTable(other.fromStateActionTable),
+ outActionTable(other.outActionTable),
+ outCondSet(other.outCondSet),
+ errActionTable(other.errActionTable),
+ eofActionTable(other.eofActionTable)
+{
+ /* Duplicate all the transitions. */
+ for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ TransAp *newTrans = new TransAp(*trans);
+ newTrans->toState = trans->toState;
+ outList.append( newTrans );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+StateAp::~StateAp()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Compare two states using pointers to the states. With the approximate
+ * compare the idea is that if the compare finds them the same, they can
+ * immediately be merged. */
+int ApproxCompare::compare( const StateAp *state1 , const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmAp::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::compareFullPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Got through the entire state comparison, deem them equal. */
+ return 0;
+}
+
+/* Compare class for the sort that does the intial partition of compaction. */
+int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmAp::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to test the condition pairs. */
+ PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head );
+ for ( ; !condPair.end(); condPair++ ) {
+ switch ( condPair.userState ) {
+ case RangeInS1:
+ return 1;
+ case RangeInS2:
+ return -1;
+
+ case RangeOverlap: {
+ CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace;
+ CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace;
+ if ( condSpace1 < condSpace2 )
+ return -1;
+ else if ( condSpace1 > condSpace2 )
+ return 1;
+ break;
+ }
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Use a pair iterator to test the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::compareDataPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::compareDataPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::compareDataPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::comparePartPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::comparePartPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::comparePartPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1,
+ const StateAp *state2 )
+{
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) )
+ return true;
+ break;
+
+ case RangeInS2:
+ if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case RangeOverlap:
+ if ( FsmAp::shouldMarkPtr( markIndex,
+ outPair.s1Tel.trans, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Transition Comparison.
+ */
+
+/* Compare target partitions. Either pointer may be null. */
+int FsmAp::comparePartPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( trans1 != 0 ) {
+ /* If trans1 is set then so should trans2. The initial partitioning
+ * guarantees this for us. */
+ if ( trans1->toState == 0 && trans2->toState != 0 )
+ return -1;
+ else if ( trans1->toState != 0 && trans2->toState == 0 )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Both of targets are set. */
+ return CmpOrd< MinPartition* >::compare(
+ trans1->toState->alg.partition, trans2->toState->alg.partition );
+ }
+ }
+ return 0;
+}
+
+
+/* Compares two transition pointers according to priority and functions.
+ * Either pointer may be null. Does not consider to state or from state. */
+int FsmAp::compareDataPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( trans1 == 0 && trans2 != 0 )
+ return -1;
+ else if ( trans1 != 0 && trans2 == 0 )
+ return 1;
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ return 0;
+}
+
+/* Compares two transitions according to target state, priority and functions.
+ * Does not consider from state. Either of the pointers may be null. */
+int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. */
+ if ( trans1 != 0 )
+ return -1;
+ else
+ return 1;
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. Test target state,
+ * priority and funcs. */
+ if ( trans1->toState < trans2->toState )
+ return -1;
+ else if ( trans1->toState > trans2->toState )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Test transition data. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ }
+ return 0;
+}
+
+
+bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1,
+ TransAp *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. The initial mark round
+ * should rule out this case. */
+ assert( false );
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transitions are set. If the target pair is marked, then
+ * the pair we are considering gets marked. */
+ return markIndex.isPairMarked( trans1->toState->alg.stateNum,
+ trans2->toState->alg.stateNum );
+ }
+
+ /* Neither of the transitiosn are set. */
+ return false;
+}
+
+
diff --git a/ragel/main.cpp b/ragel/main.cpp
new file mode 100644
index 0000000..41d6e6a
--- /dev/null
+++ b/ragel/main.cpp
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <unistd.h>
+#include <sstream>
+
+/* Parsing. */
+#include "ragel.h"
+
+/* Parameters and output. */
+#include "pcheck.h"
+#include "vector.h"
+#include "version.h"
+
+#include "common.cpp"
+
+using std::istream;
+using std::ostream;
+using std::ifstream;
+using std::ofstream;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Io globals. */
+istream *inStream = 0;
+ostream *outStream = 0;
+char *outputFileName = 0;
+
+/* Controls minimization. */
+MinimizeLevel minimizeLevel = MinimizePartition2;
+MinimizeOpt minimizeOpt = MinimizeMostOps;
+
+/* Graphviz dot file generation. */
+char *machineSpec = 0, *machineName = 0;
+bool machineSpecFound = false;
+
+bool printStatistics = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: ragel [options] file\n"
+"general:\n"
+" -h, -H, -?, --help Print this usage and exit\n"
+" -v, --version Print version information and exit\n"
+" -o <file> Write output to <file>\n"
+" -s Print some statistics on stderr\n"
+"fsm minimization:\n"
+" -n Do not perform minimization\n"
+" -m Minimize at the end of the compilation\n"
+" -l Minimize after most operations (default)\n"
+" -e Minimize after every operation\n"
+"machine selection:\n"
+" -S <spec> FSM specification to output for -V\n"
+" -M <machine> Machine definition/instantiation to output for -V\n"
+"host language:\n"
+" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
+" -D The host language is D\n"
+" -J The host language is Java\n"
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
+ "Copyright (c) 2001-2006 by Adrian Thurston" << endl;
+}
+
+/* Global parse data pointer. */
+//extern InputData *id;
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+/* Print the opening to a program error, then return the error stream. */
+ostream &error()
+{
+ /* Keep the error count. */
+// if ( id != 0 && id->pd != 0 )
+// id->pd->errorCount += 1;
+ gblErrorCount += 1;
+
+ cerr << PROGNAME ": ";
+ return cerr;
+}
+
+/* Print the opening to a warning, then return the error ostream. */
+ostream &warning( )
+{
+// cerr << id->fileName << ": warning: ";
+ return cerr;
+}
+
+/* Print the opening to a warning in the input, then return the error ostream. */
+ostream &warning( const InputLoc &loc )
+{
+// cerr << id->fileName << ":" << loc.line << ":" <<
+// loc.col << ": warning: ";
+ return cerr;
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, char **argv)
+{
+ ParamCheck pc("o:nmleabjkS:M:CDJvHh?-:s", argc, argv);
+ char *inputFileName = 0;
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ /* Output. */
+ case 'o':
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = pc.parameterArg;
+ }
+ break;
+
+ /* Minimization, mostly hidden options. */
+ case 'n':
+ minimizeOpt = MinimizeNone;
+ break;
+ case 'm':
+ minimizeOpt = MinimizeEnd;
+ break;
+ case 'l':
+ minimizeOpt = MinimizeMostOps;
+ break;
+ case 'e':
+ minimizeOpt = MinimizeEveryOp;
+ break;
+ case 'a':
+ minimizeLevel = MinimizeApprox;
+ break;
+ case 'b':
+ minimizeLevel = MinimizeStable;
+ break;
+ case 'j':
+ minimizeLevel = MinimizePartition1;
+ break;
+ case 'k':
+ minimizeLevel = MinimizePartition2;
+ break;
+
+ /* Machine spec. */
+ case 'S':
+ if ( *pc.parameterArg == 0 )
+ error() << "please specify an argument to -S" << endl;
+ else if ( machineSpec != 0 )
+ error() << "more than one -S argument was given" << endl;
+ else {
+ /* Ok, remember the path to the machine to generate. */
+ machineSpec = pc.parameterArg;
+ }
+ break;
+
+ /* Machine path. */
+ case 'M':
+ if ( *pc.parameterArg == 0 )
+ error() << "please specify an argument to -M" << endl;
+ else if ( machineName != 0 )
+ error() << "more than one -M argument was given" << endl;
+ else {
+ /* Ok, remember the machine name to generate. */
+ machineName = pc.parameterArg;
+ }
+ break;
+
+ /* Host language types. */
+ case 'C':
+ hostLangType = CCode;
+ hostLang = &hostLangC;
+ break;
+ case 'D':
+ hostLangType = DCode;
+ hostLang = &hostLangD;
+ break;
+ case 'J':
+ hostLangType = JavaCode;
+ hostLang = &hostLangJava;
+ break;
+
+ /* Version and help. */
+ case 'v':
+ version();
+ exit(0);
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case 's':
+ printStatistics = true;
+ break;
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ }
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ /* It is interpreted as an input file. */
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( inputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ inputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFileName != 0 && outputFileName != 0 &&
+ strcmp( inputFileName, outputFileName ) == 0 )
+ {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ /* Open the input file for reading. */
+ if ( inputFileName != 0 ) {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inputFileName );
+ inStream = inFile;
+ if ( ! inFile->is_open() )
+ error() << "could not open " << inputFileName << " for reading" << endl;
+ }
+ else {
+ inputFileName = "<stdin>";
+ inStream = &cin;
+ }
+
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ std::ostringstream outputBuffer;
+ outStream = &outputBuffer;
+
+ if ( machineSpec == 0 && machineName == 0 )
+ *outStream << "<host line=\"1\" col=\"1\">";
+
+ scan( inputFileName, *inStream );
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ /* Now send EOF to all parsers. */
+ terminateAllParsers();
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ if ( machineSpec == 0 && machineName == 0 )
+ *outStream << "</host>\n";
+
+ checkMachines();
+
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ ostream *outputFile = 0;
+ if ( outputFileName != 0 )
+ outputFile = new ofstream( outputFileName );
+ else
+ outputFile = &cout;
+
+ /* Write the machines, then the surrounding code. */
+ writeMachines( *outputFile, outputBuffer.str(), inputFileName );
+
+ if ( outputFileName != 0 )
+ delete outputFile;
+
+ return 0;
+}
diff --git a/ragel/parsedata.cpp b/ragel/parsedata.cpp
new file mode 100644
index 0000000..3209e28
--- /dev/null
+++ b/ragel/parsedata.cpp
@@ -0,0 +1,1432 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "xmlcodegen.h"
+
+using namespace std;
+
+char machineMain[] = "main";
+
+void Token::set( char *str, int len )
+{
+ length = len;
+ data = new char[len+1];
+ memcpy( data, str, len );
+ data[len] = 0;
+}
+
+void Token::append( const Token &other )
+{
+ int newLength = length + other.length;
+ char *newString = new char[newLength+1];
+ memcpy( newString, data, length );
+ memcpy( newString + length, other.data, other.length );
+ newString[newLength] = 0;
+ data = newString;
+ length = newLength;
+}
+
+/* Perform minimization after an operation according
+ * to the command line args. */
+void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
+{
+ /* Switch on the prefered minimization algorithm. */
+ if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
+ /* First clean up the graph. FsmAp operations may leave these
+ * lying around. There should be no dead end states. The subtract
+ * intersection operators are the only places where they may be
+ * created and those operators clean them up. */
+ fsm->removeUnreachableStates();
+
+ switch ( minimizeLevel ) {
+ case MinimizeApprox:
+ fsm->minimizeApproximate();
+ break;
+ case MinimizePartition1:
+ fsm->minimizePartition1();
+ break;
+ case MinimizePartition2:
+ fsm->minimizePartition2();
+ break;
+ case MinimizeStable:
+ fsm->minimizeStable();
+ break;
+ }
+ }
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( FsmAp *fsm )
+{
+ int numTrans = 0;
+ StateAp *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->outList.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Reset errno so we can check for overflow or underflow. In the event of
+ * an error, sets the return val to the upper or lower bound being tested
+ * against. */
+ errno = 0;
+ unsigned int size = keyOps->alphType->size;
+ bool unusedBits = size < sizeof(unsigned long);
+
+ unsigned long ul = strtoul( str, 0, 16 );
+
+ if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ul = 1 << (size * 8);
+ }
+
+ if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
+ ul |= (0xffffffff >> (size*8 ) ) << (size*8);
+
+ return Key( (long)ul );
+}
+
+Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Convert the number to a decimal. First reset errno so we can check
+ * for overflow or underflow. */
+ errno = 0;
+ long long minVal = keyOps->alphType->minVal;
+ long long maxVal = keyOps->alphType->maxVal;
+
+ long long ll = strtoll( str, 0, 10 );
+
+ /* Check for underflow. */
+ if ( errno == ERANGE && ll < 0 || ll < minVal) {
+ error(loc) << "literal " << str << " underflows the alphabet type" << endl;
+ ll = minVal;
+ }
+ /* Check for overflow. */
+ else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ll = maxVal;
+ }
+
+ if ( keyOps->alphType->isSigned )
+ return Key( (long)ll );
+ else
+ return Key( (unsigned long)ll );
+}
+
+/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
+ * number returned by the parser. Validates that the number doesn't overflow
+ * the alphabet type. */
+Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Switch on hex/decimal format. */
+ if ( str[0] == '0' && str[1] == 'x' )
+ return makeFsmKeyHex( str, loc, pd );
+ else
+ return makeFsmKeyDec( str, loc, pd );
+}
+
+/* Make an fsm int format (what the fsm graph uses) from a single character.
+ * Performs proper conversion depending on signed/unsigned property of the
+ * alphabet. */
+Key makeFsmKeyChar( char c, ParseData *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char type. */
+ return Key( c );
+ }
+ else {
+ /* Copy from an unsigned byte type. */
+ return Key( (unsigned char)c );
+ }
+}
+
+/* Make an fsm key array in int format (what the fsm graph uses) from a string
+ * of characters. Performs proper conversion depending on signed/unsigned
+ * property of the alphabet. */
+void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+}
+
+/* Like makeFsmKeyArray except the result has only unique keys. They ordering
+ * will be changed. */
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, ParseData *pd )
+{
+ /* Use a transitions list for getting unique keys. */
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+}
+
+FsmAp *dotFsm( ParseData *pd )
+{
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+FsmAp *dotStarFsm( ParseData *pd )
+{
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+/* Make a builtin type. Depends on the signed nature of the alphabet type. */
+FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
+{
+ /* FsmAp created to return. */
+ FsmAp *retFsm = 0;
+ bool isSigned = keyOps->isSigned;
+
+ switch ( builtin ) {
+ case BT_Any: {
+ /* All characters. */
+ retFsm = dotFsm( pd );
+ break;
+ }
+ case BT_Ascii: {
+ /* Ascii characters 0 to 127. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 0, 127 );
+ break;
+ }
+ case BT_Extend: {
+ /* Ascii extended characters. This is the full byte range. Dependent
+ * on signed, vs no signed. If the alphabet is one byte then just use
+ * dot fsm. */
+ if ( isSigned ) {
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( -128, 127 );
+ }
+ else {
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 0, 255 );
+ }
+ break;
+ }
+ case BT_Alpha: {
+ /* Alpha [A-Za-z]. */
+ FsmAp *upper = new FsmAp(), *lower = new FsmAp();
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ upper->unionOp( lower );
+ upper->minimizePartition2();
+ retFsm = upper;
+ break;
+ }
+ case BT_Digit: {
+ /* Digits [0-9]. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( '0', '9' );
+ break;
+ }
+ case BT_Alnum: {
+ /* Alpha numerics [0-9A-Za-z]. */
+ FsmAp *digit = new FsmAp(), *lower = new FsmAp();
+ FsmAp *upper = new FsmAp();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lower: {
+ /* Lower case characters. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 'a', 'z' );
+ break;
+ }
+ case BT_Upper: {
+ /* Upper case characters. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 'A', 'Z' );
+ break;
+ }
+ case BT_Cntrl: {
+ /* Control characters. */
+ FsmAp *cntrl = new FsmAp();
+ FsmAp *highChar = new FsmAp();
+ cntrl->rangeFsm( 0, 31 );
+ highChar->concatFsm( 127 );
+ cntrl->unionOp( highChar );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Graph: {
+ /* Graphical ascii characters [!-~]. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( '!', '~' );
+ break;
+ }
+ case BT_Print: {
+ /* Printable characters. Same as graph except includes space. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( ' ', '~' );
+ break;
+ }
+ case BT_Punct: {
+ /* Punctuation. */
+ FsmAp *range1 = new FsmAp();
+ FsmAp *range2 = new FsmAp();
+ FsmAp *range3 = new FsmAp();
+ FsmAp *range4 = new FsmAp();
+ range1->rangeFsm( '!', '/' );
+ range2->rangeFsm( ':', '@' );
+ range3->rangeFsm( '[', '`' );
+ range4->rangeFsm( '{', '~' );
+ range1->unionOp( range2 );
+ range1->unionOp( range3 );
+ range1->unionOp( range4 );
+ range1->minimizePartition2();
+ retFsm = range1;
+ break;
+ }
+ case BT_Space: {
+ /* Whitespace: [\t\v\f\n\r ]. */
+ FsmAp *cntrl = new FsmAp();
+ FsmAp *space = new FsmAp();
+ cntrl->rangeFsm( '\t', '\r' );
+ space->concatFsm( ' ' );
+ cntrl->unionOp( space );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Xdigit: {
+ /* Hex digits [0-9A-Fa-f]. */
+ FsmAp *digit = new FsmAp();
+ FsmAp *upper = new FsmAp();
+ FsmAp *lower = new FsmAp();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'F' );
+ lower->rangeFsm( 'a', 'f' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lambda: {
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ break;
+ }
+ case BT_Empty: {
+ retFsm = new FsmAp();
+ retFsm->emptyFsm();
+ break;
+ }}
+
+ return retFsm;
+}
+
+/* Check if this name inst or any name inst below is referenced. */
+bool NameInst::anyRefsRec()
+{
+ if ( numRefs > 0 )
+ return true;
+
+ /* Recurse on children until true. */
+ for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
+ if ( (*ch)->anyRefsRec() )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * ParseData
+ */
+
+/* Initialize the structure that will collect info during the parse of a
+ * machine. */
+ParseData::ParseData( char *fileName, char *sectionName,
+ const InputLoc &sectionLoc )
+:
+ sectionGraph(0),
+ generatingSectionSubset(false),
+ nextPriorKey(0),
+ /* 0 is reserved for global error actions. */
+ nextLocalErrKey(1),
+ nextNameId(0),
+ alphTypeSet(false),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ lowerNum(0),
+ upperNum(0),
+ fileName(fileName),
+ sectionName(sectionName),
+ sectionLoc(sectionLoc),
+ errorCount(0),
+ curActionOrd(0),
+ curPriorOrd(0),
+ rootName(0),
+ nextEpsilonResolvedLink(0),
+ nextLongestMatchId(1),
+ lmRequiresErrorState(false)
+{
+ /* Initialize the dictionary of graphs. This is our symbol table. The
+ * initialization needs to be done on construction which happens at the
+ * beginning of a machine spec so any assignment operators can reference
+ * the builtins. */
+ initGraphDict();
+}
+
+/* Clean up the data collected during a parse. */
+ParseData::~ParseData()
+{
+ /* Delete all the nodes in the action list. Will cause all the
+ * string data that represents the actions to be deallocated. */
+ actionList.empty();
+}
+
+/* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
+{
+ /* Create the name instantitaion object and insert it. */
+ NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
+ curNameInst->childVect.append( newNameInst );
+ if ( data != 0 )
+ curNameInst->children.insertMulti( data, newNameInst );
+ return newNameInst;
+}
+
+void ParseData::initNameWalk()
+{
+ curNameInst = rootName;
+ curNameChild = 0;
+}
+
+/* Goes into the next child scope. The number of the child is already set up.
+ * We need this for the syncronous name tree and parse tree walk to work
+ * properly. It is reset on entry into a scope and advanced on poping of a
+ * scope. A call to enterNameScope should be accompanied by a corresponding
+ * popNameScope. */
+NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
+{
+ /* Save off the current data. */
+ NameFrame retFrame;
+ retFrame.prevNameInst = curNameInst;
+ retFrame.prevNameChild = curNameChild;
+ retFrame.prevLocalScope = localNameScope;
+
+ /* Enter into the new name scope. */
+ for ( int i = 0; i < numScopes; i++ ) {
+ curNameInst = curNameInst->childVect[curNameChild];
+ curNameChild = 0;
+ }
+
+ if ( isLocal )
+ localNameScope = curNameInst;
+
+ return retFrame;
+}
+
+/* Return from a child scope to a parent. The parent info must be specified as
+ * an argument and is obtained from the corresponding call to enterNameScope.
+ * */
+void ParseData::popNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild+1;
+ localNameScope = frame.prevLocalScope;
+}
+
+void ParseData::resetNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild;
+ localNameScope = frame.prevLocalScope;
+}
+
+
+void ParseData::unsetObsoleteEntries( FsmAp *graph )
+{
+ /* Loop the reference names and increment the usage. Names that are no
+ * longer needed will be unset in graph. */
+ for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
+ /* Get the name. */
+ NameInst *name = *ref;
+ name->numUses += 1;
+
+ /* If the name is no longer needed unset its corresponding entry. */
+ if ( name->numUses == name->numRefs ) {
+ assert( graph->entryPoints.find( name->id ) != 0 );
+ graph->unsetEntry( name->id );
+ }
+ }
+}
+
+NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
+{
+ /* Queue needed for breadth-first search, load it with the start node. */
+ NameInstList nameQueue;
+ nameQueue.append( refFrom );
+
+ NameSet result;
+ while ( nameQueue.length() > 0 ) {
+ /* Pull the next from location off the queue. */
+ NameInst *from = nameQueue.detachFirst();
+
+ /* Look for the name. */
+ NameMapEl *low, *high;
+ if ( from->children.findMulti( data, low, high ) ) {
+ /* Record all instances of the name. */
+ for ( ; low <= high; low++ )
+ result.insert( low->value );
+ }
+
+ /* Name not there, do breadth-first operation of appending all
+ * childrent to the processing queue. */
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
+ if ( !recLabelsOnly || (*name)->isLabel )
+ nameQueue.append( *name );
+ }
+ }
+
+ /* Queue exhausted and name never found. */
+ return result;
+}
+
+void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos )
+{
+ /* Look for the name in the owning scope of the factor with aug. */
+ NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
+
+ /* If there are more parts to the name then continue on. */
+ if ( ++namePos < nameRef.length() ) {
+ /* There are more components to the name, search using all the part
+ * results as the base. */
+ for ( NameSet::Iter name = partResult; name.lte(); name++ )
+ resolveFrom( result, *name, nameRef, namePos );
+ }
+ else {
+ /* This is the last component, append the part results to the final
+ * results. */
+ result.insert( partResult );
+ }
+}
+
+/* Write out a name reference. */
+ostream &operator<<( ostream &out, const NameRef &nameRef )
+{
+ int pos = 0;
+ if ( nameRef[pos] == 0 ) {
+ out << "::";
+ pos += 1;
+ }
+ out << nameRef[pos++];
+ for ( ; pos < nameRef.length(); pos++ )
+ out << "::" << nameRef[pos];
+ return out;
+}
+
+ostream &operator<<( ostream &out, const NameInst &nameInst )
+{
+ /* Count the number fully qualified name parts. */
+ int numParents = 0;
+ NameInst *curParent = nameInst.parent;
+ while ( curParent != 0 ) {
+ numParents += 1;
+ curParent = curParent->parent;
+ }
+
+ /* Make an array and fill it in. */
+ curParent = nameInst.parent;
+ NameInst **parents = new NameInst*[numParents];
+ for ( int p = numParents-1; p >= 0; p-- ) {
+ parents[p] = curParent;
+ curParent = curParent->parent;
+ }
+
+ /* Write the parents out, skip the root. */
+ for ( int p = 1; p < numParents; p++ )
+ out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
+
+ /* Write the name and cleanup. */
+ out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
+ delete[] parents;
+ return out;
+}
+
+struct CmpNameInstLoc
+{
+ static int compare( const NameInst *ni1, const NameInst *ni2 )
+ {
+ if ( ni1->loc.line < ni2->loc.line )
+ return -1;
+ else if ( ni1->loc.line > ni2->loc.line )
+ return 1;
+ else if ( ni1->loc.col < ni2->loc.col )
+ return -1;
+ else if ( ni1->loc.col > ni2->loc.col )
+ return 1;
+ return 0;
+ }
+};
+
+void errorStateLabels( const NameSet &resolved )
+{
+ MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
+ mergeSort.sort( resolved.data, resolved.length() );
+ for ( NameSet::Iter res = resolved; res.lte(); res++ )
+ error((*res)->loc) << " -> " << **res << endl;
+}
+
+
+NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
+{
+ NameInst *nameInst = 0;
+
+ /* Do the local search if the name is not strictly a root level name
+ * search. */
+ if ( nameRef[0] != 0 ) {
+ /* If the action is referenced, resolve all of them. */
+ if ( action != 0 && action->actionRefs.length() > 0 ) {
+ /* Look for the name in all referencing scopes. */
+ NameSet resolved;
+ for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
+ resolveFrom( resolved, *actRef, nameRef, 0 );
+
+ if ( resolved.length() > 0 ) {
+ /* Take the first one. */
+ nameInst = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "state reference " << nameRef <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+ }
+
+ /* If not found in the local scope, look in global. */
+ if ( nameInst == 0 ) {
+ NameSet resolved;
+ int fromPos = nameRef[0] != 0 ? 0 : 1;
+ resolveFrom( resolved, rootName, nameRef, fromPos );
+
+ if ( resolved.length() > 0 ) {
+ /* Take the first. */
+ nameInst = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "state reference " << nameRef <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+
+ if ( nameInst == 0 ) {
+ /* If not found then complain. */
+ error(loc) << "could not resolve state reference " << nameRef << endl;
+ }
+ return nameInst;
+}
+
+void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Entry: case InlineItem::Goto:
+ case InlineItem::Call: case InlineItem::Next: {
+ /* Resolve, pass action for local search. */
+ NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
+
+ /* Check if the target goes into a longest match. */
+ NameInst *search = target->parent;
+ while ( search != 0 ) {
+ if ( search->isLongestMatch ) {
+ error(item->loc) << "cannot enter inside a longest "
+ "match construction as an entry point" << endl;
+ break;
+ }
+ search = search->parent;
+ }
+
+ /* Note the reference in the name. This will cause the entry
+ * point to survive to the end of the graph generating walk. */
+ if ( target != 0 )
+ target->numRefs += 1;
+ item->nameTarg = target;
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* Some of the item types may have children. */
+ if ( item->children != 0 )
+ resolveNameRefs( item->children, action );
+ }
+}
+
+/* Resolve references to labels in actions. */
+void ParseData::resolveActionNameRefs()
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Only care about the actions that are referenced. */
+ if ( act->actionRefs.length() > 0 )
+ resolveNameRefs( act->inlineList, act );
+ }
+}
+
+/* Walk a name tree starting at from and fill the name index. */
+void ParseData::fillNameIndex( NameInst *from )
+{
+ /* Fill the value for from in the name index. */
+ nameIndex[from->id] = from;
+
+ /* Recurse on the implicit final state and then all children. */
+ if ( from->final != 0 )
+ fillNameIndex( from->final );
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
+ fillNameIndex( *name );
+}
+
+void ParseData::makeRootName()
+{
+ /* Create the root name. */
+ rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
+}
+
+/* Build the name tree and supporting data structures. */
+void ParseData::makeNameTree( GraphDictEl *dictEl )
+{
+ /* Set up curNameInst for the walk. */
+ curNameInst = rootName;
+ curNameChild = 0;
+
+ if ( dictEl != 0 ) {
+ /* A start location has been specified. */
+ dictEl->value->makeNameTree( dictEl->loc, this );
+ }
+ else {
+ /* First make the name tree. */
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ /* Recurse on the instance. */
+ glel->value->makeNameTree( glel->loc, this );
+ }
+ }
+
+ /* The number of nodes in the tree can now be given by nextNameId */
+ nameIndex = new NameInst*[nextNameId];
+ memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
+ fillNameIndex( rootName );
+}
+
+void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
+{
+ Expression *expression = new Expression( builtin );
+ Join *join = new Join( expression );
+ JoinOrLm *joinOrLm = new JoinOrLm( join );
+ VarDef *varDef = new VarDef( name, joinOrLm );
+ GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
+ graphDict.insert( graphDictEl );
+}
+
+/* Initialize the graph dict with builtin types. */
+void ParseData::initGraphDict( )
+{
+ createBuiltin( "any", BT_Any );
+ createBuiltin( "ascii", BT_Ascii );
+ createBuiltin( "extend", BT_Extend );
+ createBuiltin( "alpha", BT_Alpha );
+ createBuiltin( "digit", BT_Digit );
+ createBuiltin( "alnum", BT_Alnum );
+ createBuiltin( "lower", BT_Lower );
+ createBuiltin( "upper", BT_Upper );
+ createBuiltin( "cntrl", BT_Cntrl );
+ createBuiltin( "graph", BT_Graph );
+ createBuiltin( "print", BT_Print );
+ createBuiltin( "punct", BT_Punct );
+ createBuiltin( "space", BT_Space );
+ createBuiltin( "xdigit", BT_Xdigit );
+ createBuiltin( "null", BT_Lambda );
+ createBuiltin( "zlen", BT_Lambda );
+ createBuiltin( "empty", BT_Empty );
+}
+
+/* Set the alphabet type. If the types are not valid returns false. */
+bool ParseData::setAlphType( char *s1, char *s2 )
+{
+ bool valid = false;
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
+ hostLang->hostTypes[i].data2 != 0 &&
+ strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
+ {
+ valid = true;
+ userAlphType = hostLang->hostTypes + i;
+ break;
+ }
+ }
+
+ alphTypeSet = true;
+ return valid;
+}
+
+/* Set the alphabet type. If the types are not valid returns false. */
+bool ParseData::setAlphType( char *s1 )
+{
+ bool valid = false;
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
+ hostLang->hostTypes[i].data2 == 0 )
+ {
+ valid = true;
+ userAlphType = hostLang->hostTypes + i;
+ break;
+ }
+ }
+
+ alphTypeSet = true;
+ return valid;
+}
+
+/* Initialize the key operators object that will be referenced by all fsms
+ * created. */
+void ParseData::initKeyOps( )
+{
+ /* Signedness and bounds. */
+ HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
+ thisKeyOps.setAlphType( alphType );
+
+ if ( lowerNum != 0 ) {
+ /* If ranges are given then interpret the alphabet type. */
+ thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
+ thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
+ }
+
+ thisCondData.nextCondKey = thisKeyOps.maxKey;
+ thisCondData.nextCondKey.increment();
+}
+
+void ParseData::printNameInst( NameInst *nameInst, int level )
+{
+ for ( int i = 0; i < level; i++ )
+ cerr << " ";
+ cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
+ " id: " << nameInst->id <<
+ " refs: " << nameInst->numRefs << endl;
+ for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
+ printNameInst( *name, level+1 );
+}
+
+/* Remove duplicates of unique actions from an action table. */
+void ParseData::removeDups( ActionTable &table )
+{
+ /* Scan through the table looking for unique actions to
+ * remove duplicates of. */
+ for ( int i = 0; i < table.length(); i++ ) {
+ /* Remove any duplicates ahead of i. */
+ for ( int r = i+1; r < table.length(); ) {
+ if ( table[r].value == table[i].value )
+ table.vremove(r);
+ else
+ r += 1;
+ }
+ }
+}
+
+/* Remove duplicates from action lists. This operates only on transition and
+ * eof action lists and so should be called once all actions have been
+ * transfered to their final resting place. */
+void ParseData::removeActionDups( FsmAp *graph )
+{
+ /* Loop all states. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ /* Loop all transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ removeDups( trans->actionTable );
+ removeDups( state->toStateActionTable );
+ removeDups( state->fromStateActionTable );
+ removeDups( state->eofActionTable );
+ }
+}
+
+Action *ParseData::newAction( char *name, InlineList *inlineList )
+{
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+
+ Action *action = new Action( loc, name, inlineList );
+ action->actionRefs.append( rootName );
+ actionList.append( action );
+ return action;
+}
+
+void ParseData::initLongestMatchData()
+{
+ if ( lmList.length() > 0 ) {
+ /* The initTokStart action resets the token start. */
+ InlineList *il1 = new InlineList;
+ il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
+ initTokStart = newAction( "initts", il1 );
+ initTokStart->isLmAction = true;
+
+ /* The initActId action gives act a default value. */
+ InlineList *il4 = new InlineList;
+ il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
+ initActId = newAction( "initact", il4 );
+ initActId->isLmAction = true;
+
+ /* The setTokStart action sets tokstart. */
+ InlineList *il5 = new InlineList;
+ il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
+ setTokStart = newAction( "tokstart", il5 );
+ setTokStart->isLmAction = true;
+
+ /* The setTokEnd action sets tokend. */
+ InlineList *il3 = new InlineList;
+ il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
+ setTokEnd = newAction( "tokend", il3 );
+ setTokEnd->isLmAction = true;
+
+ /* The action will also need an ordering: ahead of all user action
+ * embeddings. */
+ initTokStartOrd = curActionOrd++;
+ initActIdOrd = curActionOrd++;
+ setTokStartOrd = curActionOrd++;
+ setTokEndOrd = curActionOrd++;
+ }
+}
+
+/* After building the graph, do some extra processing to ensure the runtime
+ * data of the longest mactch operators is consistent. */
+void ParseData::setLongestMatchData( FsmAp *graph )
+{
+ if ( lmList.length() > 0 ) {
+ /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
+ * init the tokstart. */
+ for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
+ /* This is run after duplicates are removed, we must guard against
+ * inserting a duplicate. */
+ ActionTable &actionTable = en->value->toStateActionTable;
+ if ( ! actionTable.hasAction( initTokStart ) )
+ actionTable.setAction( initTokStartOrd, initTokStart );
+ }
+
+ /* Find the set of states that are the target of transitions with
+ * actions that have calls. These states will be targeted by fret
+ * statements. */
+ StateSet states;
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
+ if ( ati->value->anyCall && trans->toState != 0 )
+ states.insert( trans->toState );
+ }
+ }
+ }
+
+
+ /* Init tokstart upon entering the above collected states. */
+ for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
+ /* This is run after duplicates are removed, we must guard against
+ * inserting a duplicate. */
+ ActionTable &actionTable = (*ps)->toStateActionTable;
+ if ( ! actionTable.hasAction( initTokStart ) )
+ actionTable.setAction( initTokStartOrd, initTokStart );
+ }
+ }
+}
+
+/* Make the graph from a graph dict node. Does minimization and state sorting. */
+FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
+{
+ /* Build the graph from a walk of the parse tree. */
+ FsmAp *graph = gdNode->value->walk( this );
+
+ /* Resolve any labels that point to multiple states. Any labels that are
+ * still around are referenced only by gotos and calls and they need to be
+ * made into deterministic entry points. */
+ graph->deterministicEntry();
+
+ /*
+ * All state construction is now complete.
+ */
+
+ /* Transfer global error actions. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
+ graph->transferErrorActions( state, 0 );
+
+ removeActionDups( graph );
+
+ /* Remove unreachable states. There should be no dead end states. The
+ * subtract and intersection operators are the only places where they may
+ * be created and those operators clean them up. */
+ graph->removeUnreachableStates();
+
+ /* No more fsm operations are to be done. Action ordering numbers are
+ * no longer of use and will just hinder minimization. Clear them. */
+ graph->nullActionKeys();
+
+ /* Transition priorities are no longer of use. We can clear them
+ * because they will just hinder minimization as well. Clear them. */
+ graph->clearAllPriorities();
+
+ if ( minimizeOpt != MinimizeNone ) {
+ /* Minimize here even if we minimized at every op. Now that function
+ * keys have been cleared we may get a more minimal fsm. */
+ switch ( minimizeLevel ) {
+ case MinimizeApprox:
+ graph->minimizeApproximate();
+ break;
+ case MinimizeStable:
+ graph->minimizeStable();
+ break;
+ case MinimizePartition1:
+ graph->minimizePartition1();
+ break;
+ case MinimizePartition2:
+ graph->minimizePartition2();
+ break;
+ }
+ }
+
+ graph->compressTransitions();
+
+ return graph;
+}
+
+void ParseData::printNameTree()
+{
+ /* Print the name instance map. */
+ for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
+ printNameInst( *name, 0 );
+
+ cerr << "name index:" << endl;
+ /* Show that the name index is correct. */
+ for ( int ni = 0; ni < nextNameId; ni++ ) {
+ cerr << ni << ": ";
+ char *name = nameIndex[ni]->name;
+ cerr << ( name != 0 ? name : "<ANON>" ) << endl;
+ }
+}
+
+FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
+{
+ /* Build the name tree and supporting data structures. */
+ makeNameTree( gdNode );
+
+ /* Resove name references from gdNode. */
+ initNameWalk();
+ gdNode->value->resolveNameRefs( this );
+
+ /* Do not resolve action references. Since we are not building the entire
+ * graph there's a good chance that many name references will fail. This
+ * is okay since generating part of the graph is usually only done when
+ * inspecting the compiled machine. */
+
+ /* Flag this case so that the XML code generator is aware that we haven't
+ * looked up name references in actions. It can then avoid segfaulting. */
+ generatingSectionSubset = true;
+
+ /* Just building the specified graph. */
+ initNameWalk();
+ FsmAp *mainGraph = makeInstance( gdNode );
+
+ return mainGraph;
+}
+
+FsmAp *ParseData::makeAll()
+{
+ /* Build the name tree and supporting data structures. */
+ makeNameTree( 0 );
+
+ /* Resove name references in the tree. */
+ initNameWalk();
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
+ glel->value->resolveNameRefs( this );
+
+ /* Resolve action code name references. */
+ resolveActionNameRefs();
+
+ FsmAp *mainGraph = 0;
+ FsmAp **graphs = new FsmAp*[instanceList.length()];
+ int numOthers = 0;
+
+ /* Make all the instantiations, we know that main exists in this list. */
+ initNameWalk();
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ if ( strcmp( glel->key, machineMain ) == 0 ) {
+ /* Main graph is always instantiated. */
+ mainGraph = makeInstance( glel );
+ }
+ else {
+ /* Check to see if the instance is ever referenced. */
+ NameInst *nameInst = nextNameScope();
+ if ( nameInst->anyRefsRec() )
+ graphs[numOthers++] = makeInstance( glel );
+ else {
+ /* Need to walk over the name tree item. */
+ NameFrame nameFrame = enterNameScope( true, 1 );
+ popNameScope( nameFrame );
+ }
+ }
+ }
+
+ if ( numOthers > 0 ) {
+ /* Add all the other graphs into main. */
+ mainGraph->globOp( graphs, numOthers );
+ }
+
+ delete[] graphs;
+ return mainGraph;
+}
+
+void ParseData::analyzeAction( Action *action, InlineList *inlineList )
+{
+ /* FIXME: Actions used as conditions should be very constrained. */
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ action->anyCall = true;
+
+ /* Need to recurse into longest match items. */
+ if ( item->type == InlineItem::LmSwitch ) {
+ LongestMatch *lm = item->longestMatch;
+ for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+ }
+
+ if ( item->type == InlineItem::LmOnLast ||
+ item->type == InlineItem::LmOnNext ||
+ item->type == InlineItem::LmOnLagBehind )
+ {
+ LongestMatchPart *lmi = item->longestMatchPart;
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( action, item->children );
+ }
+}
+
+
+/* Check actions for bad uses of fsm directives. We don't go inside longest
+ * match items in actions created by ragel, since we just want the user
+ * actions. */
+void ParseData::checkInlineList( Action *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* EOF checks. */
+ if ( act->numEofRefs > 0 ) {
+ switch ( item->type ) {
+ case InlineItem::PChar:
+ error(item->loc) << "pointer to current element does not exist in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Char:
+ error(item->loc) << "current element does not exist in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Hold:
+ error(item->loc) << "changing the current element not possible in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Exec:
+ error(item->loc) << "changing the current element not possible in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Goto: case InlineItem::Call:
+ case InlineItem::Next: case InlineItem::GotoExpr:
+ case InlineItem::CallExpr: case InlineItem::NextExpr:
+ case InlineItem::Ret:
+ error(item->loc) << "changing the current state not possible in "
+ "EOF action code" << endl;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Recurse. */
+ if ( item->children != 0 )
+ checkInlineList( act, item->children );
+ }
+}
+
+void ParseData::checkAction( Action *action )
+{
+ /* Check for actions with calls that are embedded within a longest match
+ * machine. */
+ if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
+ for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
+ NameInst *check = *ar;
+ while ( check != 0 ) {
+ if ( check->isLongestMatch ) {
+ error(action->loc) << "within a scanner, fcall is permitted"
+ " only in pattern actions" << endl;
+ break;
+ }
+ check = check->parent;
+ }
+ }
+ }
+
+ checkInlineList( action, action->inlineList );
+}
+
+
+void ParseData::analyzeGraph( FsmAp *graph )
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ analyzeAction( act, act->inlineList );
+
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ /* The transition list. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
+ at->value->numTransRefs += 1;
+ }
+
+ for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
+ at->value->numToStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
+ at->value->numFromStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
+ at->value->numEofRefs += 1;
+
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
+ (*sci)->numCondRefs += 1;
+ }
+ }
+
+ /* Checks for bad usage of directives in action code. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ checkAction( act );
+}
+
+void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
+{
+ beginProcessing();
+ initKeyOps();
+ makeRootName();
+ initLongestMatchData();
+
+ /* Make the graph, do minimization. */
+ if ( graphDictEl == 0 )
+ sectionGraph = makeAll();
+ else
+ sectionGraph = makeSpecific( graphDictEl );
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return;
+
+ analyzeGraph( sectionGraph );
+
+ /* Depends on the graph analysis. */
+ setLongestMatchData( sectionGraph );
+}
+
+void ParseData::generateXML( ostream &out )
+{
+ beginProcessing();
+
+ /* Make the generator. */
+ XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
+
+ /* Write out with it. */
+ codeGen.writeXML();
+
+ if ( printStatistics ) {
+ cerr << "fsm name : " << sectionName << endl;
+ cerr << "num states: " << sectionGraph->stateList.length() << endl;
+ cerr << endl;
+ }
+}
+
+/* Send eof to all parsers. */
+void terminateAllParsers( )
+{
+ /* FIXME: a proper token is needed here. Suppose we should use the
+ * location of EOF in the last file that the parser was referenced in. */
+ InputLoc loc;
+ loc.fileName = "<EOF>";
+ loc.line = 0;
+ loc.col = 0;
+ for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
+ pdel->value->token( loc, _eof, 0, 0 );
+}
+
+void checkMachines( )
+{
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *pd = parser->value->pd;
+ if ( pd->instanceList.length() > 0 ) {
+ /* There must be a main graph defined. */
+ /* No machine name. Need to have a main. Make sure it was given. */
+ GraphDictEl *mainEl = pd->graphDict.find( machineMain );
+ if ( mainEl == 0 ) {
+ error(pd->sectionLoc) << "main graph not defined in \"" <<
+ pd->sectionName << "\"" << endl;
+ }
+ }
+ }
+}
+
+void writeLanguage( std::ostream &out )
+{
+ out << " lang=\"";
+ switch ( hostLangType ) {
+ case CCode: out << "C"; break;
+ case DCode: out << "D"; break;
+ case JavaCode: out << "Java"; break;
+ }
+ out << "\"";
+
+}
+
+void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
+{
+ if ( machineSpec == 0 && machineName == 0 ) {
+ /* No machine spec or machine name given. Generate everything. */
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *pd = parser->value->pd;
+ if ( pd->instanceList.length() > 0 )
+ pd->prepareMachineGen( 0 );
+ }
+
+ if ( gblErrorCount == 0 ) {
+ out << "<ragel filename=\"" << inputFileName << "\"";
+ writeLanguage( out );
+ out << ">\n";
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *pd = parser->value->pd;
+ if ( pd->instanceList.length() > 0 )
+ pd->generateXML( out );
+ }
+ out << hostData;
+ out << "</ragel>\n";
+ }
+ }
+ else if ( parserDict.length() > 0 ) {
+ /* There is either a machine spec or machine name given. */
+ ParseData *parseData = 0;
+ GraphDictEl *graphDictEl = 0;
+
+ /* Traverse the sections, break out when we find a section/machine
+ * that matches the one specified. */
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *checkPd = parser->value->pd;
+ if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
+ GraphDictEl *checkGdEl = 0;
+ if ( machineName == 0 || (checkGdEl =
+ checkPd->graphDict.find( machineName )) != 0 )
+ {
+ /* Have a machine spec and/or machine name that matches
+ * the -M/-S options. */
+ parseData = checkPd;
+ graphDictEl = checkGdEl;
+ break;
+ }
+ }
+ }
+
+ if ( parseData == 0 )
+ error() << "could not locate machine specified with -S and/or -M" << endl;
+ else {
+ /* Section/Machine to emit was found. Prepare and emit it. */
+ parseData->prepareMachineGen( graphDictEl );
+ if ( gblErrorCount == 0 ) {
+ out << "<ragel filename=\"" << inputFileName << "\"";
+ writeLanguage( out );
+ out << ">\n";
+ parseData->generateXML( out );
+ out << hostData;
+ out << "</ragel>\n";
+ }
+ }
+ }
+}
diff --git a/ragel/parsedata.h b/ragel/parsedata.h
new file mode 100644
index 0000000..a856257
--- /dev/null
+++ b/ragel/parsedata.h
@@ -0,0 +1,463 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSEDATA_H
+#define _PARSEDATA_H
+
+#include <iostream>
+#include <limits.h>
+#include "avlmap.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+#include "fsmgraph.h"
+#include "compare.h"
+#include "vector.h"
+#include "common.h"
+#include "parsetree.h"
+
+/* Forwards. */
+using std::ostream;
+
+/* Nodes in the tree that use this action. */
+typedef Vector<NameInst*> ActionRefs;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>,
+ public AvlTreeEl<Action>
+{
+public:
+
+ Action( const InputLoc &loc, char *name, InlineList *inlineList )
+ :
+ loc(loc),
+ name(name),
+ inlineList(inlineList),
+ actionId(-1),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ numCondRefs(0),
+ anyCall(false),
+ isLmAction(false)
+ {
+ }
+
+ /* Key for action dictionary. */
+ char *getKey() const { return name; }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ char *name;
+ InlineList *inlineList;
+ int actionId;
+
+ void actionName( ostream &out )
+ {
+ if ( name != 0 )
+ out << name;
+ else
+ out << loc.line << ":" << loc.col;
+ }
+
+ /* Places in the input text that reference the action. */
+ ActionRefs actionRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+ int numCondRefs;
+ bool anyCall;
+
+ bool isLmAction;
+};
+
+/* A list of actions. */
+typedef DList<Action> ActionList;
+typedef AvlTree<Action, char *, CmpStr> ActionDict;
+
+/* Structure for reverse action mapping. */
+struct RevActionMapEl
+{
+ char *name;
+ InputLoc location;
+};
+
+struct VarDef;
+struct Join;
+struct Expression;
+struct Term;
+struct FactorWithAug;
+struct FactorWithLabel;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Literal;
+struct Range;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct LongestMatch;
+typedef DList<LongestMatch> LmList;
+
+/* Graph dictionary. */
+struct GraphDictEl
+:
+ public AvlTreeEl<GraphDictEl>,
+ public DListEl<GraphDictEl>
+{
+ GraphDictEl( char *k )
+ : key(k), value(0), isInstance(false) { }
+ GraphDictEl( char *k, VarDef *value )
+ : key(k), value(value), isInstance(false) { }
+
+ const char *getKey() { return key; }
+
+ char *key;
+ VarDef *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict;
+typedef DList<GraphDictEl> GraphList;
+
+/* Priority name dictionary. */
+typedef AvlMapEl<char*, int> PriorDictEl;
+typedef AvlMap<char*, int, CmpStr> PriorDict;
+
+/* Local error name dictionary. */
+typedef AvlMapEl<char*, int> LocalErrDictEl;
+typedef AvlMap<char*, int, CmpStr> LocalErrDict;
+
+/* Tree of instantiated names. */
+typedef BstMapEl<char*, NameInst*> NameMapEl;
+typedef BstMap<char*, NameInst*, CmpStr> NameMap;
+typedef Vector<NameInst*> NameVect;
+typedef BstSet<NameInst*> NameSet;
+
+/* Node in the tree of instantiated names. */
+struct NameInst
+{
+ NameInst( const InputLoc &loc, NameInst *parent, char *name, int id, bool isLabel ) :
+ loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
+ isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
+
+ InputLoc loc;
+
+ /* Keep parent pointers in the name tree to retrieve
+ * fully qulified names. */
+ NameInst *parent;
+
+ char *name;
+ int id;
+ bool isLabel;
+ bool isLongestMatch;
+
+ int numRefs;
+ int numUses;
+
+ /* Names underneath us, excludes anonymous names. */
+ NameMap children;
+
+ /* All names underneath us in order of appearance. */
+ NameVect childVect;
+
+ /* Join scopes need an implicit "final" target. */
+ NameInst *start, *final;
+
+ /* During a fsm generation walk, lists the names that are referenced by
+ * epsilon operations in the current scope. After the link is made by the
+ * epsilon reference and the join operation is complete, the label can
+ * have its refcount decremented. Once there are no more references the
+ * entry point can be removed from the fsm returned. */
+ NameVect referencedNames;
+
+ /* Pointers for the name search queue. */
+ NameInst *prev, *next;
+
+ /* Check if this name inst or any name inst below is referenced. */
+ bool anyRefsRec();
+};
+
+typedef DList<NameInst> NameInstList;
+
+/* Stack frame used in walking the name tree. */
+struct NameFrame
+{
+ NameInst *prevNameInst;
+ int prevNameChild;
+ NameInst *prevLocalScope;
+};
+
+/* Class to collect information about the machine during the
+ * parse of input. */
+struct ParseData
+{
+ /* Create a new parse data object. This is done at the beginning of every
+ * fsm specification. */
+ ParseData( char *fileName, char *sectionName, const InputLoc &sectionLoc );
+ ~ParseData();
+
+ /*
+ * Setting up the graph dict.
+ */
+
+ /* Initialize a graph dict with the basic fsms. */
+ void initGraphDict();
+ void createBuiltin( char *name, BuiltinMachine builtin );
+
+ /* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+ NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
+ void makeRootName();
+ void makeNameTree( GraphDictEl *gdNode );
+ void fillNameIndex( NameInst *from );
+ void printNameTree();
+
+ /* Increments the usage count on entry names. Names that are no longer
+ * needed will have their entry points unset. */
+ void unsetObsoleteEntries( FsmAp *graph );
+
+ /* Resove name references in action code and epsilon transitions. */
+ NameSet resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly );
+ void resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos );
+ NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
+ void resolveNameRefs( InlineList *inlineList, Action *action );
+ void resolveActionNameRefs();
+
+ /* Set the alphabet type. If type types are not valid returns false. */
+ bool setAlphType( char *s1, char *s2 );
+ bool setAlphType( char *s1 );
+
+ /* Unique actions. */
+ void removeDups( ActionTable &actionTable );
+ void removeActionDups( FsmAp *graph );
+
+ /* Dumping the name instantiation tree. */
+ void printNameInst( NameInst *nameInst, int level );
+
+ /* Make the graph from a graph dict node. Does minimization. */
+ FsmAp *makeInstance( GraphDictEl *gdNode );
+ FsmAp *makeSpecific( GraphDictEl *gdNode );
+ FsmAp *makeAll();
+
+ /* Checking the contents of actions. */
+ void checkAction( Action *action );
+ void checkInlineList( Action *act, InlineList *inlineList );
+
+ void analyzeAction( Action *action, InlineList *inlineList );
+ void analyzeGraph( FsmAp *graph );
+
+ void prepareMachineGen( GraphDictEl *graphDictEl );
+ void generateXML( ostream &out );
+ FsmAp *sectionGraph;
+ bool generatingSectionSubset;
+
+ void initKeyOps();
+
+ /*
+ * Data collected during the parse.
+ */
+
+ /* Dictionary of graphs. Both instances and non-instances go here. */
+ GraphDict graphDict;
+
+ /* The list of instances. */
+ GraphList instanceList;
+
+ /* Dictionary of actions. Lets actions be defined and then referenced. */
+ ActionDict actionDict;
+
+ /* Dictionary of named priorities. */
+ PriorDict priorDict;
+
+ /* Dictionary of named local errors. */
+ LocalErrDict localErrDict;
+
+ /* List of actions. Will be pasted into a switch statement. */
+ ActionList actionList;
+
+ /* The id of the next priority name and label. */
+ int nextPriorKey, nextLocalErrKey, nextNameId;
+
+ /* The default priority number key for a machine. This is active during
+ * the parse of the rhs of a machine assignment. */
+ int curDefPriorKey;
+
+ int curDefLocalErrKey;
+
+ /* Alphabet type. */
+ HostType *userAlphType;
+ bool alphTypeSet;
+
+ /* Element type and get key expression. */
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+
+ /* The alphabet range. */
+ char *lowerNum, *upperNum;
+ Key lowKey, highKey;
+ InputLoc rangeLowLoc, rangeHighLoc;
+
+ /* The name of the file the fsm is from, and the spec name. */
+ char *fileName;
+ char *sectionName;
+ InputLoc sectionLoc;
+
+ /* Number of errors encountered parsing the fsm spec. */
+ int errorCount;
+
+ /* Counting the action and priority ordering. */
+ int curActionOrd;
+ int curPriorOrd;
+
+ /* Root of the name tree. */
+ NameInst *rootName;
+ NameInst *curNameInst;
+ int curNameChild;
+
+ /* The place where resolved epsilon transitions go. These cannot go into
+ * the parse tree because a single epsilon op can resolve more than once
+ * to different nameInsts if the machine it's in is used more than once. */
+ NameVect epsilonResolvedLinks;
+ int nextEpsilonResolvedLink;
+
+ /* Root of the name tree used for doing local name searches. */
+ NameInst *localNameScope;
+
+ void setLmInRetLoc( InlineList *inlineList );
+ void initLongestMatchData();
+ void setLongestMatchData( FsmAp *graph );
+ void initNameWalk();
+ NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
+ NameFrame enterNameScope( bool isLocal, int numScopes );
+ void popNameScope( const NameFrame &frame );
+ void resetNameScope( const NameFrame &frame );
+
+ /* Make name ids to name inst pointers. */
+ NameInst **nameIndex;
+
+ /* Counter for assigning ids to longest match items. */
+ int nextLongestMatchId;
+ bool lmRequiresErrorState;
+
+ /* List of all longest match parse tree items. */
+ LmList lmList;
+
+ Action *newAction( char *name, InlineList *inlineList );
+
+ Action *initTokStart;
+ int initTokStartOrd;
+
+ Action *setTokStart;
+ int setTokStartOrd;
+
+ Action *initActId;
+ int initActIdOrd;
+
+ Action *setTokEnd;
+ int setTokEndOrd;
+
+ void beginProcessing()
+ {
+ ::condData = &thisCondData;
+ ::keyOps = &thisKeyOps;
+ }
+
+ CondData thisCondData;
+ KeyOps thisKeyOps;
+};
+
+void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true );
+Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyChar( char c, ParseData *pd );
+void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, ParseData *pd );
+FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
+FsmAp *dotFsm( ParseData *pd );
+FsmAp *dotStarFsm( ParseData *pd );
+
+void errorStateLabels( const NameSet &locations );
+
+/* Data used by the parser specific to the current file. Supports the include
+ * system, since a new parser is executed for each included file. */
+struct InputData
+{
+ InputData( char *fileName, char *includeSpec, char *includeTo ) :
+ pd(0), sectionName(0), defaultParseData(0),
+ first_line(1), first_column(1),
+ last_line(1), last_column(0),
+ fileName(fileName), includeSpec(includeSpec),
+ includeTo(includeTo), active(true)
+ {}
+
+ /* For collecting a name references. */
+ NameRef nameRef;
+ NameRefList nameRefList;
+
+ /* The parse data. For each fsm spec, the parser collects things that it parses
+ * in data structures in here. */
+ ParseData *pd;
+
+ char *sectionName;
+ ParseData *defaultParseData;
+
+ int first_line;
+ int first_column;
+ int last_line;
+ int last_column;
+
+ char *fileName;
+
+ /* If this is an included file, this contains the specification to search
+ * for. IncludeTo will contain the spec name that does the includng. */
+ char *includeSpec;
+ char *includeTo;
+
+ bool active;
+ InputLoc sectionLoc;
+};
+
+struct Parser;
+
+typedef AvlMap<char*, Parser *, CmpStr> ParserDict;
+typedef AvlMapEl<char*, Parser *> ParserDictEl;
+
+extern ParserDict parserDict;
+
+
+#endif /* _PARSEDATA_H */
diff --git a/ragel/parsetree.cpp b/ragel/parsetree.cpp
new file mode 100644
index 0000000..11c58fa
--- /dev/null
+++ b/ragel/parsetree.cpp
@@ -0,0 +1,2111 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+/* Parsing. */
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsetree.h"
+
+using namespace std;
+ostream &operator<<( ostream &out, const NameRef &nameRef );
+ostream &operator<<( ostream &out, const NameInst &nameInst );
+
+/* Convert the literal string which comes in from the scanner into an array of
+ * characters with escapes and options interpreted. Also null terminates the
+ * string. Though this null termination should not be relied on for
+ * interpreting literals in the parser because the string may contain a
+ * literal string with \0 */
+void Token::prepareLitString( Token &result, bool &caseInsensitive )
+{
+ result.data = new char[this->length+1];
+ caseInsensitive = false;
+
+ char *src = this->data + 1;
+ char *end = this->data + this->length - 1;
+
+ while ( *end != '\'' && *end != '\"' ) {
+ if ( *end == 'i' )
+ caseInsensitive = true;
+ else {
+ error( this->loc ) << "literal string '" << *end <<
+ "' option not supported" << endl;
+ }
+ end -= 1;
+ }
+
+ char *dest = result.data;
+ int len = 0;
+ while ( src != end ) {
+ if ( *src == '\\' ) {
+ switch ( src[1] ) {
+ case '0': dest[len++] = '\0'; break;
+ case 'a': dest[len++] = '\a'; break;
+ case 'b': dest[len++] = '\b'; break;
+ case 't': dest[len++] = '\t'; break;
+ case 'n': dest[len++] = '\n'; break;
+ case 'v': dest[len++] = '\v'; break;
+ case 'f': dest[len++] = '\f'; break;
+ case 'r': dest[len++] = '\r'; break;
+ case '\n': break;
+ default: dest[len++] = src[1]; break;
+ }
+ src += 2;
+ }
+ else {
+ dest[len++] = *src++;
+ }
+ }
+ result.length = len;
+ result.data[result.length] = 0;
+}
+
+
+FsmAp *VarDef::walk( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse on the expression. */
+ FsmAp *rtnVal = joinOrLm->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* If the expression below is a join operation with multiple expressions
+ * then it just had epsilon transisions resolved. If it is a join
+ * with only a single expression then run the epsilon op now. */
+ if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
+ rtnVal->epsilonOp();
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ /* If the name of the variable is referenced then add the entry point to
+ * the graph. */
+ if ( pd->curNameInst->numRefs > 0 )
+ rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+ return rtnVal;
+}
+
+void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
+{
+ /* The variable definition enters a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, name, false );
+
+ if ( joinOrLm->type == JoinOrLm::LongestMatchType )
+ pd->curNameInst->isLongestMatch = true;
+
+ /* Recurse. */
+ joinOrLm->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void VarDef::resolveNameRefs( ParseData *pd )
+{
+ /* Entering into a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse. */
+ joinOrLm->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+InputLoc LongestMatchPart::getLoc()
+{
+ return action != 0 ? action->loc : semiLoc;
+}
+
+/*
+ * If there are any LMs then all of the following entry points must reset
+ * tokstart:
+ *
+ * 1. fentry(StateRef)
+ * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
+ * 3. targt of any transition that has an fcall (the return loc).
+ * 4. start state of all longest match routines.
+ */
+
+Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
+ char *name, InlineList *inlineList )
+{
+ Action *action = new Action( loc, name, inlineList );
+ action->actionRefs.append( pd->curNameInst );
+ pd->actionList.append( action );
+ action->isLmAction = true;
+ return action;
+}
+
+void LongestMatch::makeActions( ParseData *pd )
+{
+ /* Make actions that set the action id. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
+ char *actName = new char[50];
+ sprintf( actName, "store%i", lmi->longestMatchId );
+ lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the last character. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLast ) );
+ char *actName = new char[50];
+ sprintf( actName, "imm%i", lmi->longestMatchId );
+ lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the next
+ * character. These actions will set tokend themselves (it is the current
+ * char). */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnNext ) );
+ char *actName = new char[50];
+ sprintf( actName, "lagh%i", lmi->longestMatchId );
+ lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart at tokend. These
+ * actions execute some time after matching the last char. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLagBehind ) );
+ char *actName = new char[50];
+ sprintf( actName, "lag%i", lmi->longestMatchId );
+ lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+
+ /* Create the error action. */
+ InlineList *il6 = new InlineList;
+ il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
+ lmActSelect = newAction( pd, loc, "lagsel", il6 );
+}
+
+void LongestMatch::findName( ParseData *pd )
+{
+ NameInst *nameInst = pd->curNameInst;
+ while ( nameInst->name == 0 ) {
+ nameInst = nameInst->parent;
+ /* Since every machine must must have a name, we should always find a
+ * name for the longest match. */
+ assert( nameInst != 0 );
+ }
+ name = nameInst->name;
+}
+
+void LongestMatch::makeNameTree( ParseData *pd )
+{
+ /* Create an anonymous scope for the longest match. Will be used for
+ * restarting machine after matching a token. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Recurse into all parts of the longest match operator. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
+ lmi->join->makeNameTree( pd );
+
+ /* Traverse the name tree upwards to find a name for this lm. */
+ findName( pd );
+
+ /* Also make the longest match's actions at this point. */
+ makeActions( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void LongestMatch::resolveNameRefs( ParseData *pd )
+{
+ /* The longest match gets its own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Take an action reference for each longest match item and recurse. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* Record the reference if the item has an action. */
+ if ( lmi->action != 0 )
+ lmi->action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse down the join. */
+ lmi->join->resolveNameRefs( pd );
+ }
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+void LongestMatch::restart( FsmAp *graph, TransAp *trans )
+{
+ StateAp *fromState = trans->fromState;
+ graph->detachTrans( fromState, trans->toState, trans );
+ graph->attachTrans( fromState, graph->startState, trans );
+}
+
+void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
+{
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( 0 );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* Transfer the first item of non-empty lmAction tables to the item sets
+ * of the states that follow. Exclude states that have no transitions out.
+ * This must happen on a separate pass so that on each iteration of the
+ * next pass we have the item set entries from all lmAction tables. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() > 0 ) {
+ /* Fill the item sets. */
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( lmAct->value );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* The lmItem sets are now filled, telling us which longest match rules
+ * can succeed in which states. First determine if we need to make sure
+ * act is defaulted to zero. We need to do this if there are any states
+ * with lmItemSet.length() > 1 and NULL is included. That is, that the
+ * switch may get called when in fact nothing has been matched. */
+ int maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* The actions executed on starting to match a token. */
+ graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
+ graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
+ if ( maxItemSetLength > 1 ) {
+ /* The longest match action switch may be called when tokens are
+ * matched, in which case act must be initialized, there must be a
+ * case to handle the error, and the generated machine will require an
+ * error state. */
+ lmSwitchHandlesError = true;
+ pd->lmRequiresErrorState = true;
+ graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
+ }
+
+ /* The place to store transitions to restart. It maybe possible for the
+ * restarting to affect the searching through the graph that follows. For
+ * now take the safe route and save the list of transitions to restart
+ * until after all searching is done. */
+ Vector<TransAp*> restartTrans;
+
+ /* Set actions that do immediate token recognition, set the longest match part
+ * id and set the token ending. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() == 0 ) {
+ /* Can execute the immediate action for the longest match
+ * part. Redirect the action to the start state. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->actOnLast );
+ restartTrans.append( trans );
+ }
+ else {
+ /* Look for non final states that have a non-empty item
+ * set. If these are present then we need to record the
+ * end of the token. Also Find the highest item set
+ * length reachable from here (excluding at transtions to
+ * final states). */
+ bool nonFinalNonEmptyItemSet = false;
+ maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
+ nonFinalNonEmptyItemSet = true;
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* If there are reachable states that are not final and
+ * have non empty item sets or that have an item set
+ * length greater than one then we need to set tokend
+ * because the error action that matches the token will
+ * require it. */
+ if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
+ trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
+
+ /* Some states may not know which longest match item to
+ * execute, must set it. */
+ if ( maxItemSetLength > 1 ) {
+ /* There are transitions out, another match may come. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->setActId );
+ }
+ }
+ }
+ }
+ }
+
+ /* Now that all graph searching is done it certainly safe set the
+ * restarting. It may be safe above, however this must be verified. */
+ for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
+ restart( graph, *pt );
+
+ int lmErrActionOrd = pd->curActionOrd++;
+
+ /* Embed the error for recognizing a char. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
+ if ( st->isFinState() ) {
+ /* On error execute the onActNext action, which knows that
+ * the last character of the token was one back and restart. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actOnNext, 1 );
+ }
+ else {
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actLagBehind, 1 );
+ }
+ }
+ else if ( st->lmItemSet.length() > 1 ) {
+ /* Need to use the select. Take note of the which items the select
+ * is needed for so only the necessary actions are included. */
+ for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
+ if ( *plmi != 0 )
+ (*plmi)->inLmSelect = true;
+ }
+ /* On error, execute the action select and go to the start state. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &lmActSelect, 1 );
+ }
+ }
+
+ /* Finally, the start state should be made final. */
+ graph->setFinState( graph->startState );
+}
+
+FsmAp *LongestMatch::walk( ParseData *pd )
+{
+ /* The longest match has it's own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Make each part of the longest match. */
+ FsmAp **parts = new FsmAp*[longestMatchList->length()];
+ LmPartList::Iter lmi = *longestMatchList;
+ for ( int i = 0; lmi.lte(); lmi++, i++ ) {
+ /* Create the machine and embed the setting of the longest match id. */
+ parts[i] = lmi->join->walk( pd );
+ parts[i]->longMatchAction( pd->curActionOrd++, lmi );
+ }
+
+ /* Union machines one and up with machine zero. The grammar dictates that
+ * there will always be at least one part. */
+ FsmAp *rtnVal = parts[0];
+ for ( int i = 1; i < longestMatchList->length(); i++ ) {
+ rtnVal->unionOp( parts[i] );
+ afterOpMinimize( rtnVal );
+ }
+
+ runLonestMatch( pd, rtnVal );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] parts;
+ return rtnVal;
+}
+
+FsmAp *JoinOrLm::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case JoinType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ }
+ return rtnVal;
+}
+
+void JoinOrLm::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ }
+}
+
+void JoinOrLm::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ }
+}
+
+
+/* Construct with a location and the first expression. */
+Join::Join( const InputLoc &loc, Expression *expr )
+:
+ loc(loc)
+{
+ exprList.append( expr );
+}
+
+/* Construct with a location and the first expression. */
+Join::Join( Expression *expr )
+:
+ loc(loc)
+{
+ exprList.append( expr );
+}
+
+/* Walk an expression node. */
+FsmAp *Join::walk( ParseData *pd )
+{
+ if ( exprList.length() > 1 )
+ return walkJoin( pd );
+ else
+ return exprList.head->walk( pd );
+}
+
+/* There is a list of expressions to join. */
+FsmAp *Join::walkJoin( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Evaluate the machines. */
+ FsmAp **fsms = new FsmAp*[exprList.length()];
+ ExprList::Iter expr = exprList;
+ for ( int e = 0; e < exprList.length(); e++, expr++ )
+ fsms[e] = expr->walk( pd );
+
+ /* Get the start and final names. Final is
+ * guaranteed to exist, start is not. */
+ NameInst *startName = pd->curNameInst->start;
+ NameInst *finalName = pd->curNameInst->final;
+
+ int startId = -1;
+ if ( startName != 0 ) {
+ /* Take note that there was an implicit link to the start machine. */
+ pd->localNameScope->referencedNames.append( startName );
+ startId = startName->id;
+ }
+
+ /* A final id of -1 indicates there is no epsilon that references the
+ * final state, therefor do not create one or set an entry point to it. */
+ int finalId = -1;
+ if ( finalName->numRefs > 0 )
+ finalId = finalName->id;
+
+ /* Join machines 1 and up onto machine 0. */
+ FsmAp *retFsm = fsms[0];
+ retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( retFsm );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] fsms;
+ return retFsm;
+}
+
+void Join::makeNameTree( ParseData *pd )
+{
+ if ( exprList.length() > 1 ) {
+ /* Create the new anonymous scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Join scopes need an implicit "final" target. */
+ pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
+ pd->nextNameId++, false );
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->makeNameTree( pd );
+ }
+}
+
+
+void Join::resolveNameRefs( ParseData *pd )
+{
+ /* Branch on whether or not there is to be a join. */
+ if ( exprList.length() > 1 ) {
+ /* The variable definition enters a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* The join scope must contain a start label. */
+ NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
+ if ( resolved.length() > 0 ) {
+ /* Take the first. */
+ pd->curNameInst->start = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "multiple start labels" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+
+ /* Make sure there is a start label. */
+ if ( pd->curNameInst->start != 0 ) {
+ /* There is an implicit reference to start name. */
+ pd->curNameInst->start->numRefs += 1;
+ }
+ else {
+ /* No start label. Complain and recover by adding a label to the
+ * adding one. Recover ignoring the problem. */
+ error(loc) << "no start label" << endl;
+ }
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->resolveNameRefs( pd );
+ }
+}
+
+/* Clean up after an expression node. */
+Expression::~Expression()
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ delete expression;
+ delete term;
+ break;
+ case TermType:
+ delete term;
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Evaluate a single expression node. */
+FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case OrType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd, false );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform union. */
+ rtnVal->unionOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case IntersectType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform intersection. */
+ rtnVal->intersectOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case SubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case StrongSubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+
+ /* Evaluate the term and pad it with any* machines. */
+ FsmAp *rhs = dotStarFsm( pd );
+ FsmAp *termFsm = term->walk( pd );
+ FsmAp *trailAnyStar = dotStarFsm( pd );
+ rhs->concatOp( termFsm );
+ rhs->concatOp( trailAnyStar );
+
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case TermType: {
+ /* Return result of the term. */
+ rtnVal = term->walk( pd );
+ break;
+ }
+ case BuiltinType: {
+ /* Duplicate the builtin. */
+ rtnVal = makeBuiltin( builtin, pd );
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+void Expression::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->makeNameTree( pd );
+ term->makeNameTree( pd );
+ break;
+ case TermType:
+ term->makeNameTree( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+void Expression::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->resolveNameRefs( pd );
+ term->resolveNameRefs( pd );
+ break;
+ case TermType:
+ term->resolveNameRefs( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Clean up after a term node. */
+Term::~Term()
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ delete term;
+ delete factorWithAug;
+ break;
+ case FactorWithAugType:
+ delete factorWithAug;
+ break;
+ }
+}
+
+/* Evaluate a term node. */
+FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case ConcatType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd, false );
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightStartType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the right get the higher start priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The start transitions right machine get the higher priority.
+ * Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightFinishType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the finishing transitions to the right
+ * get the higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The finishing transitions of the right machine get the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case LeftType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The right machine gets the lower priority. Since
+ * startTransPrior might unnecessarily increase the number of
+ * states during the state machine construction process (due to
+ * isolation), we use allTransPrior instead, which has the same
+ * effect. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case FactorWithAugType: {
+ rtnVal = factorWithAug->walk( pd );
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+void Term::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->makeNameTree( pd );
+ factorWithAug->makeNameTree( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->makeNameTree( pd );
+ break;
+ }
+}
+
+void Term::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->resolveNameRefs( pd );
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with augmentation node. */
+FactorWithAug::~FactorWithAug()
+{
+ delete factorWithRep;
+
+ /* Walk the vector of parser actions, deleting function names. */
+
+ /* Clean up priority descriptors. */
+ if ( priorDescs != 0 )
+ delete[] priorDescs;
+}
+
+void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
+{
+ /* Assign actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ switch ( actions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransAction( actionOrd[i], actions[i].action );
+ break;
+ case at_finish:
+ graph->finishFsmAction( actionOrd[i], actions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Global error actions. */
+ case at_start_gbl_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
+ afterOpMinimize( graph );
+ break;
+ case at_all_gbl_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_final_gbl_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_start_gbl_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_final_gbl_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_middle_gbl_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+
+ /* Local error actions. */
+ case at_start_local_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ afterOpMinimize( graph );
+ break;
+ case at_all_local_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_final_local_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_start_local_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_final_local_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_middle_local_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+
+ /* EOF actions. */
+ case at_start_eof:
+ graph->startEOFAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_eof:
+ graph->allEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_eof:
+ graph->finalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_eof:
+ graph->notStartEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_eof:
+ graph->notFinalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_eof:
+ graph->middleEOFAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* To State Actions. */
+ case at_start_to_state:
+ graph->startToStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_to_state:
+ graph->allToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_to_state:
+ graph->finalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_to_state:
+ graph->notStartToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_to_state:
+ graph->notFinalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_to_state:
+ graph->middleToStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* From State Actions. */
+ case at_start_from_state:
+ graph->startFromStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_from_state:
+ graph->allFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_from_state:
+ graph->finalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_from_state:
+ graph->notStartFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_from_state:
+ graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_from_state:
+ graph->middleFromStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Remaining cases, prevented by the parser. */
+ default:
+ assert( false );
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
+{
+ /* Assign priorities. */
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ switch ( priorityAugs[i].type ) {
+ case at_start:
+ graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
+ /* Start fsm priorities are a special case that may require
+ * minimization afterwards. */
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_finish:
+ graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_leave:
+ graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+
+ default:
+ /* Parser Prevents this case. */
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignConditions( FsmAp *graph )
+{
+ for ( int i = 0; i < conditions.length(); i++ ) {
+ switch ( conditions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmCondition( conditions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransCondition( conditions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmCondition( conditions[i].action );
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+/* Evaluate a factor with augmentation node. */
+FsmAp *FactorWithAug::walk( ParseData *pd )
+{
+ /* Enter into the scopes created for the labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Make the array of function orderings. */
+ int *actionOrd = 0;
+ if ( actions.length() > 0 )
+ actionOrd = new int[actions.length()];
+
+ /* First walk the list of actions, assigning order to all starting
+ * actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type == at_start ||
+ actions[i].type == at_start_gbl_error ||
+ actions[i].type == at_start_local_error ||
+ actions[i].type == at_start_to_state ||
+ actions[i].type == at_start_from_state ||
+ actions[i].type == at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Evaluate the factor with repetition. */
+ FsmAp *rtnVal = factorWithRep->walk( pd );
+
+ /* Compute the remaining action orderings. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type != at_start &&
+ actions[i].type != at_start_gbl_error &&
+ actions[i].type != at_start_local_error &&
+ actions[i].type != at_start_to_state &&
+ actions[i].type != at_start_from_state &&
+ actions[i].type != at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ assignConditions( rtnVal );
+
+ assignActions( pd, rtnVal , actionOrd );
+
+ /* Make the array of priority orderings. Orderings are local to this walk
+ * of the factor with augmentation. */
+ int *priorOrd = 0;
+ if ( priorityAugs.length() > 0 )
+ priorOrd = new int[priorityAugs.length()];
+
+ /* Walk all priorities, assigning the priority ordering. */
+ for ( int i = 0; i < priorityAugs.length(); i++ )
+ priorOrd[i] = pd->curPriorOrd++;
+
+ /* If the priority descriptors have not been made, make them now. Make
+ * priority descriptors for each priority asignment that will be passed to
+ * the fsm. Used to keep track of the key, value and used bit. */
+ if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
+ priorDescs = new PriorDesc[priorityAugs.length()];
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ /* Init the prior descriptor for the priority setting. */
+ priorDescs[i].key = priorityAugs[i].priorKey;
+ priorDescs[i].priority = priorityAugs[i].priorValue;
+ }
+ }
+
+ /* Assign priorities into the machine. */
+ assignPriorities( rtnVal, priorOrd );
+
+ /* Assign epsilon transitions. */
+ for ( int e = 0; e < epsilonLinks.length(); e++ ) {
+ /* Get the name, which may not exist. If it doesn't then silently
+ * ignore it because an error has already been reported. */
+ NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
+ if ( epTarg != 0 ) {
+ /* Make the epsilon transitions. */
+ rtnVal->epsilonTrans( epTarg->id );
+
+ /* Note that we have made a link to the name. */
+ pd->localNameScope->referencedNames.append( epTarg );
+ }
+ }
+
+ /* Set entry points for labels. */
+ if ( labels.length() > 0 ) {
+ /* Pop the names. */
+ pd->resetNameScope( nameFrame );
+
+ /* Make labels that are referenced into entry points. */
+ for ( int i = 0; i < labels.length(); i++ ) {
+ pd->enterNameScope( false, 1 );
+
+ /* Will always be found. */
+ NameInst *name = pd->curNameInst;
+
+ /* If the name is referenced then set the entry point. */
+ if ( name->numRefs > 0 )
+ rtnVal->setEntry( name->id, rtnVal->startState );
+ }
+
+ pd->popNameScope( nameFrame );
+ }
+
+ if ( priorOrd != 0 )
+ delete[] priorOrd;
+ if ( actionOrd != 0 )
+ delete[] actionOrd;
+ return rtnVal;
+}
+
+void FactorWithAug::makeNameTree( ParseData *pd )
+{
+ /* Add the labels to the tree of instantiated names. Each label
+ * makes a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ for ( int i = 0; i < labels.length(); i++ )
+ pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
+
+ /* Recurse, then pop the names. */
+ factorWithRep->makeNameTree( pd );
+ pd->curNameInst = prevNameInst;
+}
+
+
+void FactorWithAug::resolveNameRefs( ParseData *pd )
+{
+ /* Enter into the name scope created by any labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Note action references. */
+ for ( int i = 0; i < actions.length(); i++ )
+ actions[i].action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse first. IMPORTANT: we must do the exact same traversal as when
+ * the tree is constructed. */
+ factorWithRep->resolveNameRefs( pd );
+
+ /* Resolve epsilon transitions. */
+ for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
+ /* Get the link. */
+ EpsilonLink &link = epsilonLinks[ep];
+ NameInst *resolvedName = 0;
+
+ if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
+ /* Epsilon drawn to an implicit final state. An implicit final is
+ * only available in join operations. */
+ resolvedName = pd->localNameScope->final;
+ }
+ else {
+ /* Do an search for the name. */
+ NameSet resolved;
+ pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
+ if ( resolved.length() > 0 ) {
+ /* Take the first one. */
+ resolvedName = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(link.loc) << "state reference " << link.target <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+
+ /* This is tricky, we stuff resolved epsilon transitions into one long
+ * vector in the parse data structure. Since the name resolution and
+ * graph generation both do identical walks of the parse tree we
+ * should always find the link resolutions in the right place. */
+ pd->epsilonResolvedLinks.append( resolvedName );
+
+ if ( resolvedName != 0 ) {
+ /* Found the name, bump of the reference count on it. */
+ resolvedName->numRefs += 1;
+ }
+ else {
+ /* Complain, no recovery action, the epsilon op will ignore any
+ * epsilon transitions whose names did not resolve. */
+ error(link.loc) << "could not resolve label " << link.target << endl;
+ }
+ }
+
+ if ( labels.length() > 0 )
+ pd->popNameScope( nameFrame );
+}
+
+
+/* Clean up after a factor with repetition node. */
+FactorWithRep::~FactorWithRep()
+{
+ switch ( type ) {
+ case StarType: case StarStarType: case OptionalType: case PlusType:
+ case ExactType: case MaxType: case MinType: case RangeType:
+ delete factorWithRep;
+ break;
+ case FactorWithNegType:
+ delete factorWithNeg;
+ break;
+ }
+}
+
+/* Evaluate a factor with repetition node. */
+FsmAp *FactorWithRep::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case StarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case StarStarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Set up the prior descs. All gets priority one, whereas leaving gets
+ * priority zero. Make a unique key so that these priorities don't
+ * interfere with any priorities set by the user. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* Leaveing gets priority 0. Use same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case OptionalType: {
+ /* Make the null fsm. */
+ FsmAp *nu = new FsmAp();
+ nu->lambdaFsm( );
+
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+
+ /* Perform the question operator. */
+ retFsm->unionOp( nu );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case PlusType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying plus operator to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ /* Need a duplicated for the star end. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* The start func orders need to be shifted before doing the star. */
+ pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case ExactType: {
+ /* Get an int from the repetition amount. */
+ if ( lowerRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "exactly zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MaxType: {
+ /* Get an int from the repetition amount. */
+ if ( upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "max zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying max repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MinType: {
+ /* Evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying min repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the repetition
+ * and the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Acts just like a star op on the machine to return. */
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* Take a duplicate for the plus. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ /* Tak on the kleene star. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case RangeType: {
+ /* Check for bogus range. */
+ if ( upperRep - lowerRep < 0 ) {
+ error(loc) << "invalid range repetition" << endl;
+
+ /* Return null machine as recovery. */
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else if ( lowerRep == 0 && upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep. This
+ * defeats the purpose so give a warning. */
+ warning(loc) << "zero to zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Now need to evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying range repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing both kinds
+ * of repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Just doing max repetition. Already guarded against n == 0. */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ else if ( lowerRep == upperRep ) {
+ /* Just doing exact repetition. Already guarded against n == 0. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* This is the case that 0 < lowerRep < upperRep. Take a
+ * duplicate for the optional repeat. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Do optional repetition on the second half. */
+ dup->optionalRepeatOp( upperRep - lowerRep );
+ afterOpMinimize( dup );
+
+ /* Tak on the duplicate machine. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ }
+ break;
+ }
+ case FactorWithNegType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factorWithNeg->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithRep::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->makeNameTree( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithRep::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->resolveNameRefs( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with negation node. */
+FactorWithNeg::~FactorWithNeg()
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ delete factorWithNeg;
+ break;
+ case FactorType:
+ delete factor;
+ break;
+ }
+}
+
+/* Evaluate a factor with negation node. */
+FsmAp *FactorWithNeg::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case NegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* Negation is subtract from dot-star. */
+ retFsm = dotStarFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case CharNegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* CharNegation is subtract from dot. */
+ retFsm = dotFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case FactorType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factor->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithNeg::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ case FactorType:
+ factor->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithNeg::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ case FactorType:
+ factor->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor node. */
+Factor::~Factor()
+{
+ switch ( type ) {
+ case LiteralType:
+ delete literal;
+ break;
+ case RangeType:
+ delete range;
+ break;
+ case OrExprType:
+ delete reItem;
+ break;
+ case RegExprType:
+ delete regExp;
+ break;
+ case ReferenceType:
+ break;
+ case ParenType:
+ delete join;
+ break;
+ case LongestMatchType:
+ delete longestMatch;
+ break;
+ }
+}
+
+/* Evaluate a factor node. */
+FsmAp *Factor::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case LiteralType:
+ rtnVal = literal->walk( pd );
+ break;
+ case RangeType:
+ rtnVal = range->walk( pd );
+ break;
+ case OrExprType:
+ rtnVal = reItem->walk( pd, 0 );
+ break;
+ case RegExprType:
+ rtnVal = regExp->walk( pd, 0 );
+ break;
+ case ReferenceType:
+ rtnVal = varDef->walk( pd );
+ break;
+ case ParenType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ }
+
+ return rtnVal;
+}
+
+void Factor::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->makeNameTree( loc, pd );
+ break;
+ case ParenType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ }
+}
+
+void Factor::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->resolveNameRefs( pd );
+ break;
+ case ParenType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up a range object. Must delete the two literals. */
+Range::~Range()
+{
+ delete lowerLit;
+ delete upperLit;
+}
+
+bool Range::verifyRangeFsm( FsmAp *rangeEnd )
+{
+ /* Must have two states. */
+ if ( rangeEnd->stateList.length() != 2 )
+ return false;
+ /* The start state cannot be final. */
+ if ( rangeEnd->startState->isFinState() )
+ return false;
+ /* There should be only one final state. */
+ if ( rangeEnd->finStateSet.length() != 1 )
+ return false;
+ /* The final state cannot have any transitions out. */
+ if ( rangeEnd->finStateSet[0]->outList.length() != 0 )
+ return false;
+ /* The start state should have only one transition out. */
+ if ( rangeEnd->startState->outList.length() != 1 )
+ return false;
+ /* The singe transition out of the start state should not be a range. */
+ TransAp *startTrans = rangeEnd->startState->outList.head;
+ if ( startTrans->lowKey != startTrans->highKey )
+ return false;
+ return true;
+}
+
+/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
+FsmAp *Range::walk( ParseData *pd )
+{
+ /* Construct and verify the suitability of the lower end of the range. */
+ FsmAp *lowerFsm = lowerLit->walk( pd );
+ if ( !verifyRangeFsm( lowerFsm ) ) {
+ error(lowerLit->token.loc) <<
+ "bad range lower end, must be a single character" << endl;
+ }
+
+ /* Construct and verify the upper end. */
+ FsmAp *upperFsm = upperLit->walk( pd );
+ if ( !verifyRangeFsm( upperFsm ) ) {
+ error(upperLit->token.loc) <<
+ "bad range upper end, must be a single character" << endl;
+ }
+
+ /* Grab the keys from the machines, then delete them. */
+ Key lowKey = lowerFsm->startState->outList.head->lowKey;
+ Key highKey = upperFsm->startState->outList.head->lowKey;
+ delete lowerFsm;
+ delete upperFsm;
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Return the range now that it is validated. */
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeFsm( lowKey, highKey );
+ return retFsm;
+}
+
+/* Evaluate a literal object. */
+FsmAp *Literal::walk( ParseData *pd )
+{
+ /* FsmAp to return, is the alphabet signed. */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Number: {
+ /* Make the fsm key in int format. */
+ Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ rtnVal->concatFsm( fsmKey );
+ break;
+ }
+ case LitString: {
+ /* Make the array of keys in int format. */
+ Token interp;
+ bool caseInsensitive;
+ token.prepareLitString( interp, caseInsensitive );
+ Key *arr = new Key[interp.length];
+ makeFsmKeyArray( arr, interp.data, interp.length, pd );
+
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ if ( caseInsensitive )
+ rtnVal->concatFsmCI( arr, interp.length );
+ else
+ rtnVal->concatFsm( arr, interp.length );
+ delete[] interp.data;
+ delete[] arr;
+ break;
+ }}
+ return rtnVal;
+}
+
+/* Clean up after a regular expression object. */
+RegExpr::~RegExpr()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete regExp;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* This is the root regex, pass down a pointer to this. */
+ if ( rootRegex == 0 )
+ rootRegex = this;
+
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Walk both items. */
+ FsmAp *fsm1 = regExp->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->concatOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after an item in a regular expression. */
+ReItem::~ReItem()
+{
+ switch ( type ) {
+ case Data:
+ case Dot:
+ break;
+ case OrBlock:
+ case NegOrBlock:
+ delete orBlock;
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The fsm to return, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Data: {
+ /* Move the data into an integer array and make a concat fsm. */
+ Key *arr = new Key[token.length];
+ makeFsmKeyArray( arr, token.data, token.length, pd );
+
+ /* Make the concat fsm. */
+ rtnVal = new FsmAp();
+ if ( rootRegex != 0 && rootRegex->caseInsensitive )
+ rtnVal->concatFsmCI( arr, token.length );
+ else
+ rtnVal->concatFsm( arr, token.length );
+ delete[] arr;
+ break;
+ }
+ case Dot: {
+ /* Make the dot fsm. */
+ rtnVal = dotFsm( pd );
+ break;
+ }
+ case OrBlock: {
+ /* Get the or block and minmize it. */
+ rtnVal = orBlock->walk( pd, rootRegex );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ case NegOrBlock: {
+ /* Get the or block and minimize it. */
+ FsmAp *fsm = orBlock->walk( pd, rootRegex );
+ fsm->minimizePartition2();
+
+ /* Make a dot fsm and subtract from it. */
+ rtnVal = dotFsm( pd );
+ rtnVal->subtractOp( fsm );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ }
+
+ /* If the item is followed by a star, then apply the star op. */
+ if ( star ) {
+ if ( rtnVal->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ rtnVal->starOp();
+ rtnVal->minimizePartition2();
+ }
+ return rtnVal;
+}
+
+/* Clean up after an or block of a regular expression. */
+ReOrBlock::~ReOrBlock()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete orBlock;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+
+/* Evaluate an or block of a regular expression. */
+FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Evaluate the two fsm. */
+ FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->unionOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;;
+}
+
+/* Evaluate an or block item of a regular expression. */
+FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The return value, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case Data: {
+ /* Make the or machine. */
+ rtnVal = new FsmAp();
+
+ /* Put the or data into an array of ints. Note that we find unique
+ * keys. Duplicates are silently ignored. The alternative would be to
+ * issue warning or an error but since we can't with [a0-9a] or 'a' |
+ * 'a' don't bother here. */
+ KeySet keySet;
+ makeFsmUniqueKeyArray( keySet, token.data, token.length,
+ rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
+
+ /* Run the or operator. */
+ rtnVal->orFsm( keySet.data, keySet.length() );
+ break;
+ }
+ case Range: {
+ /* Make the upper and lower keys. */
+ Key lowKey = makeFsmKeyChar( lower, pd );
+ Key highKey = makeFsmKeyChar( upper, pd );
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Make the range machine. */
+ rtnVal = new FsmAp();
+ rtnVal->rangeFsm( lowKey, highKey );
+
+ if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
+ if ( lowKey <= 'Z' && 'A' <= highKey ) {
+ Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
+ Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
+
+ otherLow = 'a' + ( otherLow - 'A' );
+ otherHigh = 'a' + ( otherHigh - 'A' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ else if ( lowKey <= 'z' && 'a' <= highKey ) {
+ Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
+ Key otherHigh = 'z' < highKey ? Key('z') : highKey;
+
+ otherLow = 'A' + ( otherLow - 'a' );
+ otherHigh = 'A' + ( otherHigh - 'a' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ }
+
+ break;
+ }}
+ return rtnVal;
+}
diff --git a/ragel/parsetree.h b/ragel/parsetree.h
new file mode 100644
index 0000000..c340171
--- /dev/null
+++ b/ragel/parsetree.h
@@ -0,0 +1,761 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSETREE_H
+#define _PARSETREE_H
+
+#include "avlmap.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+
+struct NameInst;
+
+/* Types of builtin machines. */
+enum BuiltinMachine
+{
+ BT_Any,
+ BT_Ascii,
+ BT_Extend,
+ BT_Alpha,
+ BT_Digit,
+ BT_Alnum,
+ BT_Lower,
+ BT_Upper,
+ BT_Cntrl,
+ BT_Graph,
+ BT_Print,
+ BT_Punct,
+ BT_Space,
+ BT_Xdigit,
+ BT_Lambda,
+ BT_Empty
+};
+
+/* Location in an input file. */
+struct InputLoc
+{
+ char *fileName;
+ int line;
+ int col;
+};
+
+struct ParseData;
+
+/* Leaf type. */
+struct Literal;
+
+/* Tree nodes. */
+
+struct Term;
+struct FactorWithAug;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Expression;
+struct Join;
+struct JoinOrLm;
+struct LongestMatch;
+struct LongestMatchPart;
+struct LmPartList;
+struct Range;
+
+/* Type of augmentation. Describes locations in the machine. */
+enum AugType
+{
+ /* Transition actions/priorities. */
+ at_start,
+ at_all,
+ at_finish,
+ at_leave,
+
+ /* Global error actions. */
+ at_start_gbl_error,
+ at_all_gbl_error,
+ at_final_gbl_error,
+ at_not_start_gbl_error,
+ at_not_final_gbl_error,
+ at_middle_gbl_error,
+
+ /* Local error actions. */
+ at_start_local_error,
+ at_all_local_error,
+ at_final_local_error,
+ at_not_start_local_error,
+ at_not_final_local_error,
+ at_middle_local_error,
+
+ /* To State Action embedding. */
+ at_start_to_state,
+ at_all_to_state,
+ at_final_to_state,
+ at_not_start_to_state,
+ at_not_final_to_state,
+ at_middle_to_state,
+
+ /* From State Action embedding. */
+ at_start_from_state,
+ at_all_from_state,
+ at_final_from_state,
+ at_not_start_from_state,
+ at_not_final_from_state,
+ at_middle_from_state,
+
+ /* EOF Action embedding. */
+ at_start_eof,
+ at_all_eof,
+ at_final_eof,
+ at_not_start_eof,
+ at_not_final_eof,
+ at_middle_eof
+};
+
+/* IMPORTANT: These must follow the same order as the state augs in AugType
+ * since we will be using this to compose AugType. */
+enum StateAugType
+{
+ sat_start = 0,
+ sat_all,
+ sat_final,
+ sat_not_start,
+ sat_not_final,
+ sat_middle
+};
+
+struct Action;
+struct PriorDesc;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct ExplicitMachine;
+struct InlineItem;
+struct InlineList;
+
+/* Reference to a named state. */
+typedef Vector<char*> NameRef;
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
+
+/* Structure for storing location of epsilon transitons. */
+struct EpsilonLink
+{
+ EpsilonLink( const InputLoc &loc, NameRef &target )
+ : loc(loc), target(target) { }
+
+ InputLoc loc;
+ NameRef target;
+};
+
+struct Label
+{
+ Label( const InputLoc &loc, char *data )
+ : loc(loc), data(data) { }
+
+ InputLoc loc;
+ char *data;
+};
+
+/* Structrue represents an action assigned to some FactorWithAug node. The
+ * factor with aug will keep an array of these. */
+struct ParserAction
+{
+ ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
+ : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
+
+ InputLoc loc;
+ AugType type;
+ int localErrKey;
+ Action *action;
+};
+
+struct Token
+{
+ char *data;
+ int length;
+ InputLoc loc;
+
+ void prepareLitString( Token &result, bool &caseInsensitive );
+ void append( const Token &other );
+ void set( char *str, int len );
+};
+
+/* Store the value and type of a priority augmentation. */
+struct PriorityAug
+{
+ PriorityAug( AugType type, int priorKey, int priorValue ) :
+ type(type), priorKey(priorKey), priorValue(priorValue) { }
+
+ AugType type;
+ int priorKey;
+ int priorValue;
+};
+
+/*
+ * A Variable Definition
+ */
+struct VarDef
+{
+ VarDef( char *name, JoinOrLm *joinOrLm )
+ : name(name), joinOrLm(joinOrLm) { }
+
+ /* Parse tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( const InputLoc &loc, ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ char *name;
+ JoinOrLm *joinOrLm;
+};
+
+
+/*
+ * LongestMatch
+ *
+ * Wherever possible the item match will execute on the character. If not
+ * possible the item match will execute on a lookahead character and either
+ * hold the current char (if one away) or backup.
+ *
+ * How to handle the problem of backing up over a buffer break?
+ *
+ * Don't want to use pending out transitions for embedding item match because
+ * the role of item match action is different: it may sometimes match on the
+ * final transition, or may match on a lookahead character.
+ *
+ * Don't want to invent a new operator just for this. So just trail action
+ * after machine, this means we can only use literal actions.
+ *
+ * The item action may
+ *
+ * What states of the machine will be final. The item actions that wrap around
+ * on the last character will go straight to the start state.
+ *
+ * Some transitions will be lookahead transitions, they will hold the current
+ * character. Crossing them with regular transitions must be restricted
+ * because it does not make sense. The transition cannot simultaneously hold
+ * and consume the current character.
+ */
+struct LongestMatchPart
+{
+ LongestMatchPart( Join *join, Action *action,
+ InputLoc &semiLoc, int longestMatchId )
+ :
+ join(join), action(action), semiLoc(semiLoc),
+ longestMatchId(longestMatchId), inLmSelect(false) { }
+
+ InputLoc getLoc();
+
+ Join *join;
+ Action *action;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ int longestMatchId;
+ bool inLmSelect;
+ LongestMatch *longestMatch;
+
+ LongestMatchPart *prev, *next;
+};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct LmPartList : DList<LongestMatchPart> {};
+
+struct LongestMatch
+{
+ /* Construct with a list of joins */
+ LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
+ loc(loc), longestMatchList(longestMatchList), name(0),
+ lmSwitchHandlesError(false) { }
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+ void runLonestMatch( ParseData *pd, FsmAp *graph );
+ Action *newAction( ParseData *pd, const InputLoc &loc, char *name,
+ InlineList *inlineList );
+ void makeActions( ParseData *pd );
+ void findName( ParseData *pd );
+ void restart( FsmAp *graph, TransAp *trans );
+
+ InputLoc loc;
+ LmPartList *longestMatchList;
+ char *name;
+
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
+
+ LongestMatch *next, *prev;
+};
+
+
+/* List of Expressions. */
+typedef DList<Expression> ExprList;
+
+struct JoinOrLm
+{
+ enum Type {
+ JoinType,
+ LongestMatchType
+ };
+
+ JoinOrLm( Join *join ) :
+ join(join), type(JoinType) {}
+ JoinOrLm( LongestMatch *longestMatch ) :
+ longestMatch(longestMatch), type(LongestMatchType) {}
+
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ Join *join;
+ LongestMatch *longestMatch;
+ Type type;
+};
+
+/*
+ * Join
+ */
+struct Join
+{
+ /* Construct with the first expression. */
+ Join( Expression *expr );
+ Join( const InputLoc &loc, Expression *expr );
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ FsmAp *walkJoin( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ /* Data. */
+ InputLoc loc;
+ ExprList exprList;
+};
+
+/*
+ * Expression
+ */
+struct Expression
+{
+ enum Type {
+ OrType,
+ IntersectType,
+ SubtractType,
+ StrongSubtractType,
+ TermType,
+ BuiltinType
+ };
+
+ /* Construct with an expression on the left and a term on the right. */
+ Expression( Expression *expression, Term *term, Type type ) :
+ expression(expression), term(term),
+ builtin(builtin), type(type), prev(this), next(this) { }
+
+ /* Construct with only a term. */
+ Expression( Term *term ) :
+ expression(0), term(term), builtin(builtin),
+ type(TermType) , prev(this), next(this) { }
+
+ /* Construct with a builtin type. */
+ Expression( BuiltinMachine builtin ) :
+ expression(0), term(0), builtin(builtin),
+ type(BuiltinType), prev(this), next(this) { }
+
+ ~Expression();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd, bool lastInSeq = true );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ /* Node data. */
+ Expression *expression;
+ Term *term;
+ BuiltinMachine builtin;
+ Type type;
+
+ Expression *prev, *next;
+};
+
+/*
+ * Term
+ */
+struct Term
+{
+ enum Type {
+ ConcatType,
+ RightStartType,
+ RightFinishType,
+ LeftType,
+ FactorWithAugType
+ };
+
+ Term( Term *term, FactorWithAug *factorWithAug ) :
+ term(term), factorWithAug(factorWithAug), type(ConcatType) { }
+
+ Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
+ term(term), factorWithAug(factorWithAug), type(type) { }
+
+ Term( FactorWithAug *factorWithAug ) :
+ term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
+
+ ~Term();
+
+ FsmAp *walk( ParseData *pd, bool lastInSeq = true );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ Term *term;
+ FactorWithAug *factorWithAug;
+ Type type;
+
+ /* Priority descriptor for RightFinish type. */
+ PriorDesc priorDescs[2];
+};
+
+
+/* Third level of precedence. Augmenting nodes with actions and priorities. */
+struct FactorWithAug
+{
+ FactorWithAug( FactorWithRep *factorWithRep ) :
+ priorDescs(0), factorWithRep(factorWithRep) { }
+ ~FactorWithAug();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
+ void assignPriorities( FsmAp *graph, int *priorOrd );
+
+ void assignConditions( FsmAp *graph );
+
+ /* Actions and priorities assigned to the factor node. */
+ Vector<ParserAction> actions;
+ Vector<PriorityAug> priorityAugs;
+ PriorDesc *priorDescs;
+ Vector<Label> labels;
+ Vector<EpsilonLink> epsilonLinks;
+ Vector<ParserAction> conditions;
+
+ FactorWithRep *factorWithRep;
+};
+
+/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
+ * optional and plus. */
+struct FactorWithRep
+{
+ enum Type {
+ StarType,
+ StarStarType,
+ OptionalType,
+ PlusType,
+ ExactType,
+ MaxType,
+ MinType,
+ RangeType,
+ FactorWithNegType
+ };
+
+ FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
+ int lowerRep, int upperRep, Type type ) :
+ loc(loc), factorWithRep(factorWithRep),
+ factorWithNeg(0), lowerRep(lowerRep),
+ upperRep(upperRep), type(type) { }
+
+ FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg )
+ : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
+
+ ~FactorWithRep();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ FactorWithRep *factorWithRep;
+ FactorWithNeg *factorWithNeg;
+ int lowerRep, upperRep;
+ Type type;
+
+ /* Priority descriptor for StarStar type. */
+ PriorDesc priorDescs[2];
+};
+
+/* Fifth level of precedence. Provides Negation. */
+struct FactorWithNeg
+{
+ enum Type {
+ NegateType,
+ CharNegateType,
+ FactorType
+ };
+
+ FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
+ loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
+
+ FactorWithNeg( const InputLoc &loc, Factor *factor ) :
+ loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { }
+
+ ~FactorWithNeg();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ FactorWithNeg *factorWithNeg;
+ Factor *factor;
+ Type type;
+};
+
+/*
+ * Factor
+ */
+struct Factor
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ RangeType,
+ OrExprType,
+ RegExprType,
+ ReferenceType,
+ ParenType,
+ LongestMatchType,
+ };
+
+ /* Construct with a literal fsm. */
+ Factor( Literal *literal ) :
+ literal(literal), type(LiteralType) { }
+
+ /* Construct with a range. */
+ Factor( Range *range ) :
+ range(range), type(RangeType) { }
+
+ /* Construct with the or part of a regular expression. */
+ Factor( ReItem *reItem ) :
+ reItem(reItem), type(OrExprType) { }
+
+ /* Construct with a regular expression. */
+ Factor( RegExpr *regExp ) :
+ regExp(regExp), type(RegExprType) { }
+
+ /* Construct with a reference to a var def. */
+ Factor( const InputLoc &loc, VarDef *varDef ) :
+ loc(loc), varDef(varDef), type(ReferenceType) {}
+
+ /* Construct with a parenthesized join. */
+ Factor( Join *join ) :
+ join(join), type(ParenType) {}
+
+ /* Construct with a longest match operator. */
+ Factor( LongestMatch *longestMatch ) :
+ longestMatch(longestMatch), type(LongestMatchType) {}
+
+ /* Cleanup. */
+ ~Factor();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ Literal *literal;
+ Range *range;
+ ReItem *reItem;
+ RegExpr *regExp;
+ VarDef *varDef;
+ Join *join;
+ LongestMatch *longestMatch;
+ int lower, upper;
+ Type type;
+};
+
+/* A range machine. Only ever composed of two literals. */
+struct Range
+{
+ Range( Literal *lowerLit, Literal *upperLit )
+ : lowerLit(lowerLit), upperLit(upperLit) { }
+
+ ~Range();
+ FsmAp *walk( ParseData *pd );
+ bool verifyRangeFsm( FsmAp *rangeEnd );
+
+ Literal *lowerLit;
+ Literal *upperLit;
+};
+
+/* Some literal machine. Can be a number or literal string. */
+struct Literal
+{
+ enum LiteralType { Number, LitString };
+
+ Literal( const Token &token, LiteralType type )
+ : token(token), type(type) { }
+
+ FsmAp *walk( ParseData *pd );
+
+ Token token;
+ LiteralType type;
+};
+
+/* Regular expression. */
+struct RegExpr
+{
+ enum RegExpType { RecurseItem, Empty };
+
+ /* Constructors. */
+ RegExpr() :
+ type(Empty), caseInsensitive(false) { }
+ RegExpr(RegExpr *regExp, ReItem *item) :
+ regExp(regExp), item(item),
+ type(RecurseItem), caseInsensitive(false) { }
+
+ ~RegExpr();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ RegExpr *regExp;
+ ReItem *item;
+ RegExpType type;
+ bool caseInsensitive;
+};
+
+/* An item in a regular expression. */
+struct ReItem
+{
+ enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
+
+ ReItem( const InputLoc &loc, const Token &token )
+ : loc(loc), token(token), star(false), type(Data) { }
+ ReItem( const InputLoc &loc, ReItemType type )
+ : loc(loc), star(false), type(type) { }
+ ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
+ : loc(loc), orBlock(orBlock), star(false), type(type) { }
+
+ ~ReItem();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ Token token;
+ ReOrBlock *orBlock;
+ bool star;
+ ReItemType type;
+};
+
+/* An or block item. */
+struct ReOrBlock
+{
+ enum ReOrBlockType { RecurseItem, Empty };
+
+ /* Constructors. */
+ ReOrBlock()
+ : type(Empty) { }
+ ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
+ : orBlock(orBlock), item(item), type(RecurseItem) { }
+
+ ~ReOrBlock();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ ReOrBlock *orBlock;
+ ReOrItem *item;
+ ReOrBlockType type;
+};
+
+/* An item in an or block. */
+struct ReOrItem
+{
+ enum ReOrItemType { Data, Range };
+
+ ReOrItem( const InputLoc &loc, const Token &token )
+ : loc(loc), token(token), type(Data) {}
+ ReOrItem( const InputLoc &loc, char lower, char upper )
+ : loc(loc), lower(lower), upper(upper), type(Range) { }
+
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ Token token;
+ char lower;
+ char upper;
+ ReOrItemType type;
+};
+
+
+/*
+ * Inline code tree
+ */
+struct InlineList;
+struct InlineItem
+{
+ enum Type
+ {
+ Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
+ Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
+ LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
+ LmInitTokStart, LmSetTokStart, Break
+ };
+
+ InlineItem( const InputLoc &loc, char *data, Type type ) :
+ loc(loc), data(data), nameRef(0), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
+ loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
+ LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
+ nameRef(0), children(0), longestMatch(longestMatch),
+ longestMatchPart(longestMatchPart), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
+ loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
+ type(type) { }
+
+ InlineItem( const InputLoc &loc, Type type ) :
+ loc(loc), data(0), nameRef(0), children(0), type(type) { }
+
+ InputLoc loc;
+ char *data;
+ NameRef *nameRef;
+ NameInst *nameTarg;
+ InlineList *children;
+ LongestMatch *longestMatch;
+ LongestMatchPart *longestMatchPart;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+/* Normally this would be atypedef, but that would entail including DList from
+ * ptreetypes, which should be just typedef forwards. */
+struct InlineList : public DList<InlineItem> { };
+
+
+
+#endif /* _PARSETREE_H */
diff --git a/ragel/ragel.h b/ragel/ragel.h
new file mode 100644
index 0000000..58f8a88
--- /dev/null
+++ b/ragel/ragel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2001-2003 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RAGEL_H
+#define _RAGEL_H
+
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include "config.h"
+
+#define PROGNAME "ragel"
+
+/* To what degree are machine minimized. */
+enum MinimizeLevel {
+ MinimizeApprox,
+ MinimizeStable,
+ MinimizePartition1,
+ MinimizePartition2
+};
+
+enum MinimizeOpt {
+ MinimizeNone,
+ MinimizeEnd,
+ MinimizeMostOps,
+ MinimizeEveryOp
+};
+
+
+/* IO filenames and stream. */
+extern char *outputFileName;
+extern std::istream *inStream;
+extern std::ostream *outStream;
+
+/* Options. */
+extern MinimizeLevel minimizeLevel;
+extern MinimizeOpt minimizeOpt;
+extern char *machineSpec, *machineName;
+extern bool printStatistics;
+
+extern int gblErrorCount;
+extern char machineMain[];
+
+/* Error reporting. */
+struct InputLoc;
+std::ostream &error();
+std::ostream &error( const InputLoc &loc );
+std::ostream &warning( );
+std::ostream &warning( const InputLoc &loc );
+
+void scan( char *fileName, std::istream &input );
+void terminateAllParsers( );
+void checkMachines( );
+void writeMachines( std::ostream &out, std::string hostData, char *inputFileName );
+void xmlEscapeHost( std::ostream &out, char *data, int len );
+
+
+/* Size of the include stack. */
+#define INCLUDE_STACK_SIZE 32
+
+#endif /* _RAGEL_H */
diff --git a/ragel/rlparse.kh b/ragel/rlparse.kh
new file mode 100644
index 0000000..5d7b404
--- /dev/null
+++ b/ragel/rlparse.kh
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef RLPARSE_H
+#define RLPARSE_H
+
+#include <iostream>
+#include "avltree.h"
+#include "parsedata.h"
+
+extern char *lelNames[];
+
+struct LangEl;
+
+struct Parser
+{
+ %%{
+ parser Parser;
+
+ # These must be declared first and in this order. Ragel currently cannot
+ # import kelbt keywords for use in machines, so in the scanner
+ # rely on knowing the values that kelbt will assign to these.
+ token KW_Machine, KW_Include, KW_Write, TK_Word, TK_Literal;
+
+ token TK_Number, TK_Inline, TK_Reference, TK_ColonEquals, TK_EndSection;
+
+ # General tokens.
+ token TK_UInt, TK_Hex, TK_Word, TK_Literal, TK_BaseClause,
+ TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, TK_Arrow,
+ TK_DoubleArrow, TK_StarStar, TK_ColonEquals, TK_NameSep, TK_BarStar,
+ TK_DashDash;
+
+ # Conditions.
+ token TK_StartCond, TK_AllCond, TK_LeavingCond;
+
+ token TK_Middle;
+
+ # Global error actions.
+ token TK_StartGblError, TK_AllGblError, TK_FinalGblError,
+ TK_NotFinalGblError, TK_NotStartGblError, TK_MiddleGblError;
+
+ # Local error actions.
+ token TK_StartLocalError, TK_AllLocalError, TK_FinalLocalError,
+ TK_NotFinalLocalError, TK_NotStartLocalError, TK_MiddleLocalError;
+
+ # EOF Action embedding.
+ token TK_StartEOF, TK_AllEOF, TK_FinalEOF, TK_NotFinalEOF, TK_NotStartEOF,
+ TK_MiddleEOF;
+
+ # To State Actions.
+ token TK_StartToState, TK_AllToState, TK_FinalToState, TK_NotFinalToState,
+ TK_NotStartToState, TK_MiddleToState;
+
+ # In State Actions.
+ token TK_StartFromState, TK_AllFromState, TK_FinalFromState,
+ TK_NotFinalFromState, TK_NotStartFromState, TK_MiddleFromState;
+
+ # Regular expression tokens. */
+ token RE_Slash, RE_SqOpen, RE_SqOpenNeg, RE_SqClose, RE_Dot, RE_Star,
+ RE_Dash, RE_Char;
+
+ # Tokens specific to inline code.
+ token IL_WhiteSpace, IL_Comment, IL_Literal, IL_Symbol;
+
+ # Keywords.
+ token KW_Action, KW_AlphType, KW_Range, KW_GetKey, KW_Include, KW_Write,
+ KW_Machine, KW_When, KW_Eof, KW_Err, KW_Lerr, KW_To, KW_From;
+
+ # Specials in code blocks.
+ token KW_Break, KW_Exec, KW_Hold, KW_PChar, KW_Char, KW_Goto, KW_Call,
+ KW_Ret, KW_CurState, KW_TargState, KW_Entry, KW_Next, KW_Exec,
+ KW_Variable, KW_Access;
+
+ # Special token for terminating semi-terminated code blocks. Needed because
+ # semi is sent as a token in the code block rather than as a generic
+ # symbol.
+ token TK_Semi;
+
+ interface;
+ }%%
+
+ Parser( char *fileName, char *sectionName, InputLoc &sectionLoc )
+ : sectionName(sectionName)
+ {
+ pd = new ParseData( fileName, sectionName, sectionLoc );
+ }
+
+ int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
+ void tryMachineDef( InputLoc &loc, char *name,
+ JoinOrLm *joinOrLm, bool isInstance );
+
+ /* Report an error encountered by the parser. */
+ ostream &parser_error( int tokId, Token &token );
+
+ ParseData *pd;
+
+ /* The name of the root section, this does not change during an include. */
+ char *sectionName;
+
+ NameRef nameRef;
+ NameRefList nameRefList;
+};
+
+#endif
diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl
new file mode 100644
index 0000000..b39fa5c
--- /dev/null
+++ b/ragel/rlparse.kl
@@ -0,0 +1,1402 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlparse.h"
+#include "ragel.h"
+#include <iostream>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+ParserDict parserDict;
+
+%%{
+
+parser Parser;
+
+include "rlparse.kh";
+
+start: statement_list;
+
+statement_list: statement_list statement;
+statement_list: ;
+
+statement: assignment commit;
+statement: instantiation commit;
+statement: action_spec commit;
+statement: alphtype_spec commit;
+statement: range_spec commit;
+statement: getkey_spec commit;
+statement: access_spec commit;
+statement: variable_spec commit;
+
+# We use end section tokens to draw firm boundaries between sections.
+statement: TK_EndSection;
+
+assignment:
+ machine_name '=' join ';' final {
+ /* Main machine must be an instance. */
+ bool isInstance = false;
+ if ( strcmp($1->token.data, machineMain) == 0 ) {
+ warning($1->token.loc) <<
+ "main machine will be implicitly instantiated" << endl;
+ isInstance = true;
+ }
+
+ /* Generic creation of machine for instantiation and assignment. */
+ JoinOrLm *joinOrLm = new JoinOrLm( $3->join );
+ tryMachineDef( $1->token.loc, $1->token.data, joinOrLm, isInstance );
+ };
+
+instantiation:
+ machine_name TK_ColonEquals join_or_lm ';' final {
+ /* Generic creation of machine for instantiation and assignment. */
+ tryMachineDef( $1->token.loc, $1->token.data, $3->joinOrLm, true );
+ };
+
+type token_type
+{
+ Token token;
+};
+
+nonterm machine_name uses token_type;
+
+machine_name:
+ TK_Word final {
+ //cerr << "parser: machine name" << endl;
+
+ /* Make/get the priority key. The name may have already been referenced
+ * and therefore exist. */
+ PriorDictEl *priorDictEl;
+ if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) )
+ pd->nextPriorKey += 1;
+ pd->curDefPriorKey = priorDictEl->value;
+
+ /* Make/get the local error key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) )
+ pd->nextLocalErrKey += 1;
+ pd->curDefLocalErrKey = localErrDictEl->value;
+
+ $$->token = *$1;
+ };
+
+action_spec:
+ KW_Action TK_Word '{' inline_block '}' final {
+ if ( pd->actionDict.find( $2->data ) ) {
+ /* Recover by just ignoring the duplicate. */
+ error($2->loc) << "action \"" << $2->data << "\" already defined" << endl;
+ }
+ else {
+ //cerr << "NEW ACTION " << $2->data << " " << $4->inlineList << endl;
+ /* Add the action to the list of actions. */
+ Action *newAction = new Action( $3->loc, $2->data, $4->inlineList );
+
+ /* Insert to list and dict. */
+ pd->actionList.append( newAction );
+ pd->actionDict.insert( newAction );
+ }
+ };
+
+# Specifies the data type of the input alphabet. One or two words followed by a
+# semi-colon.
+alphtype_spec:
+ KW_AlphType TK_Word TK_Word ';' final {
+ if ( ! pd->setAlphType( $2->data, $3->data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error($2->loc) << "\"" << $2->data <<
+ " " << $3->data << "\" is not a valid alphabet type" << endl;
+ }
+ };
+
+alphtype_spec:
+ KW_AlphType TK_Word ';' final {
+ if ( ! pd->setAlphType( $2->data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error($2->loc) << "\"" << $2->data <<
+ "\" is not a valid alphabet type" << endl;
+ }
+ };
+
+# Specifies a range to assume that the input characters will fall into.
+range_spec:
+ KW_Range alphabet_num alphabet_num ';' final {
+ // Save the upper and lower ends of the range and emit the line number.
+ pd->lowerNum = $2->token.data;
+ pd->upperNum = $3->token.data;
+ pd->rangeLowLoc = $2->token.loc;
+ pd->rangeHighLoc = $3->token.loc;
+ };
+
+getkey_spec:
+ KW_GetKey inline_expr ';' final {
+ pd->getKeyExpr = $2->inlineList;
+ };
+
+access_spec:
+ KW_Access inline_expr ';' final {
+ pd->accessExpr = $2->inlineList;
+ };
+
+variable_spec:
+ KW_Variable opt_whitespace TK_Word inline_expr ';' final {
+ /* FIXME: Need to implement the rest of this. */
+ if ( strcmp( $3->data, "curstate" ) == 0 )
+ pd->curStateExpr = $4->inlineList;
+ else {
+ error($3->loc) << "sorry, unimplementd" << endl;
+ }
+ };
+
+opt_whitespace: opt_whitespace IL_WhiteSpace;
+opt_whitespace: ;
+
+#
+# Expressions
+#
+
+nonterm join_or_lm
+{
+ JoinOrLm *joinOrLm;
+};
+
+join_or_lm:
+ join final {
+ $$->joinOrLm = new JoinOrLm( $1->join );
+ };
+join_or_lm:
+ TK_BarStar lm_part_list '*' '|' final {
+ /* Create a new factor going to a longest match structure. Record
+ * in the parse data that we have a longest match. */
+ LongestMatch *lm = new LongestMatch( $1->loc, $2->lmPartList );
+ pd->lmList.append( lm );
+ for ( LmPartList::Iter lmp = *($2->lmPartList); lmp.lte(); lmp++ )
+ lmp->longestMatch = lm;
+ $$->joinOrLm = new JoinOrLm( lm );
+ };
+
+nonterm lm_part_list
+{
+ LmPartList *lmPartList;
+};
+
+lm_part_list:
+ lm_part_list longest_match_part final {
+ if ( $2->lmPart != 0 )
+ $1->lmPartList->append( $2->lmPart );
+ $$->lmPartList = $1->lmPartList;
+ };
+lm_part_list:
+ longest_match_part final {
+ /* Create a new list with the part. */
+ $$->lmPartList = new LmPartList;
+ if ( $1->lmPart != 0 )
+ $$->lmPartList->append( $1->lmPart );
+ };
+
+nonterm longest_match_part
+{
+ LongestMatchPart *lmPart;
+};
+
+longest_match_part:
+ action_spec final { $$->lmPart = 0; };
+longest_match_part:
+ assignment final { $$->lmPart = 0; };
+longest_match_part:
+ join opt_lm_part_action ';' final {
+ $$->lmPart = 0;
+ Action *action = $2->action;
+ if ( action != 0 )
+ action->isLmAction = true;
+ $$->lmPart = new LongestMatchPart( $1->join, action,
+ $3->loc, pd->nextLongestMatchId++ );
+ };
+
+nonterm opt_lm_part_action
+{
+ Action *action;
+};
+
+opt_lm_part_action:
+ TK_DoubleArrow action_embed final {
+ $$->action = $2->action;
+ };
+opt_lm_part_action:
+ action_embed_block final {
+ $$->action = $1->action;
+ };
+opt_lm_part_action:
+ final {
+ $$->action = 0;
+ };
+
+
+nonterm join
+{
+ Join *join;
+};
+
+join:
+ join ',' expression final {
+ /* Append the expression to the list and return it. */
+ $1->join->exprList.append( $3->expression );
+ $$->join = $1->join;
+ };
+join:
+ expression final {
+ $$->join = new Join( $1->expression );
+ };
+
+nonterm expression
+{
+ Expression *expression;
+};
+
+expression:
+ expression '|' term final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::OrType );
+ };
+expression:
+ expression '&' term final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::IntersectType );
+ };
+expression:
+ expression pri(1) '-' term final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::SubtractType );
+ };
+expression:
+ expression TK_DashDash term final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::StrongSubtractType );
+ };
+expression:
+ term final {
+ $$->expression = new Expression( $1->term );
+ };
+
+nonterm term
+{
+ Term *term;
+};
+
+term:
+ term factor_with_label final {
+ /* FIXME: Need to reject this if of the form (term . -num). */
+ $$->term = new Term( $1->term, $2->factorWithAug );
+ };
+term:
+ term '.' factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug );
+ };
+term:
+ term TK_ColonGt factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType );
+ };
+term:
+ term TK_ColonGtGt factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType );
+ };
+term:
+ term TK_LtColon factor_with_label final {
+ $$->term = new Term( $1->term,
+ $3->factorWithAug, Term::LeftType );
+ };
+term:
+ factor_with_label final {
+ $$->term = new Term( $1->factorWithAug );
+ };
+
+nonterm factor_with_label
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_label:
+ TK_Word ':' factor_with_label final {
+ /* Add the label to the list and pass the factor up. */
+ $3->factorWithAug->labels.prepend( Label($1->loc, $1->data) );
+ $$->factorWithAug = $3->factorWithAug;
+ };
+factor_with_label:
+ factor_with_ep final {
+ $$->factorWithAug = $1->factorWithAug;
+ };
+
+nonterm factor_with_ep
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_ep:
+ factor_with_ep TK_Arrow local_state_ref final {
+ /* Add the target to the list and return the factor object. */
+ $1->factorWithAug->epsilonLinks.append( EpsilonLink( $2->loc, nameRef ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_ep:
+ factor_with_aug final {
+ $$->factorWithAug = $1->factorWithAug;
+ };
+
+nonterm factor_with_aug
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_aug:
+ factor_with_aug aug_type_base action_embed final {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->factorWithAug->actions.append(
+ ParserAction( $2->loc, $2->augType, 0, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_base priority_aug final {
+ /* Append the named priority to the factorWithAug and pass it up. */
+ $1->factorWithAug->priorityAugs.append(
+ PriorityAug( $2->augType, pd->curDefPriorKey, $3->priorityNum ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_base '(' priority_name ',' priority_aug ')' final {
+ /* Append the priority using a default name. */
+ $1->factorWithAug->priorityAugs.append(
+ PriorityAug( $2->augType, $4->priorityName, $6->priorityNum ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_cond action_embed final {
+ $1->factorWithAug->conditions.append( ParserAction( $2->loc,
+ $2->augType, 0, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_to_state action_embed final {
+ /* Append the action, pass it up. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, 0, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_from_state action_embed final {
+ /* Append the action, pass it up. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, 0, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_eof action_embed final {
+ /* Append the action, pass it up. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, 0, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_gbl_error action_embed final {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, pd->curDefLocalErrKey, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_local_error action_embed final {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, pd->curDefLocalErrKey, $3->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_aug aug_type_local_error '(' local_err_name ',' action_embed ')' final {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->factorWithAug->actions.append( ParserAction( $2->loc,
+ $2->augType, $4->error_name, $6->action ) );
+ $$->factorWithAug = $1->factorWithAug;
+ };
+factor_with_aug:
+ factor_with_rep final {
+ $$->factorWithAug = new FactorWithAug( $1->factorWithRep );
+ };
+
+type aug_type
+{
+ InputLoc loc;
+ AugType augType;
+};
+
+# Classes of transtions on which to embed actions or change priorities.
+nonterm aug_type_base uses aug_type;
+
+aug_type_base: '@' final { $$->loc = $1->loc; $$->augType = at_finish; };
+aug_type_base: '%' final { $$->loc = $1->loc; $$->augType = at_leave; };
+aug_type_base: '$' final { $$->loc = $1->loc; $$->augType = at_all; };
+aug_type_base: '>' final { $$->loc = $1->loc; $$->augType = at_start; };
+
+# Embedding conditions.
+nonterm aug_type_cond uses aug_type;
+
+aug_type_cond: TK_StartCond final { $$->loc = $1->loc; $$->augType = at_start; };
+aug_type_cond: '>' KW_When final { $$->loc = $1->loc; $$->augType = at_start; };
+aug_type_cond: TK_AllCond final { $$->loc = $1->loc; $$->augType = at_all; };
+aug_type_cond: '$' KW_When final { $$->loc = $1->loc; $$->augType = at_all; };
+aug_type_cond: TK_LeavingCond final { $$->loc = $1->loc; $$->augType = at_leave; };
+aug_type_cond: '%' KW_When final { $$->loc = $1->loc; $$->augType = at_leave; };
+aug_type_cond: KW_When final { $$->loc = $1->loc; $$->augType = at_all; };
+
+#
+# To state actions.
+#
+
+nonterm aug_type_to_state uses aug_type;
+
+aug_type_to_state: TK_StartToState
+ final { $$->loc = $1->loc; $$->augType = at_start_to_state; };
+aug_type_to_state: '>' KW_To
+ final { $$->loc = $1->loc; $$->augType = at_start_to_state; };
+
+aug_type_to_state: TK_NotStartToState
+ final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; };
+aug_type_to_state: '<' KW_To
+ final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; };
+
+aug_type_to_state: TK_AllToState
+ final { $$->loc = $1->loc; $$->augType = at_all_to_state; };
+aug_type_to_state: '$' KW_To
+ final { $$->loc = $1->loc; $$->augType = at_all_to_state; };
+
+aug_type_to_state: TK_FinalToState
+ final { $$->loc = $1->loc; $$->augType = at_final_to_state; };
+aug_type_to_state: '%' KW_To
+ final { $$->loc = $1->loc; $$->augType = at_final_to_state; };
+
+aug_type_to_state: TK_NotFinalToState
+ final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; };
+aug_type_to_state: '@' KW_To
+ final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; };
+
+aug_type_to_state: TK_MiddleToState
+ final { $$->loc = $1->loc; $$->augType = at_middle_to_state; };
+aug_type_to_state: TK_Middle KW_To
+ final { $$->loc = $1->loc; $$->augType = at_middle_to_state; };
+
+#
+# From state actions.
+#
+
+nonterm aug_type_from_state uses aug_type;
+
+aug_type_from_state: TK_StartFromState
+ final { $$->loc = $1->loc; $$->augType = at_start_from_state; };
+aug_type_from_state: '>' KW_From
+ final { $$->loc = $1->loc; $$->augType = at_start_from_state; };
+
+aug_type_from_state: TK_NotStartFromState
+ final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; };
+aug_type_from_state: '<' KW_From
+ final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; };
+
+aug_type_from_state: TK_AllFromState
+ final { $$->loc = $1->loc; $$->augType = at_all_from_state; };
+aug_type_from_state: '$' KW_From
+ final { $$->loc = $1->loc; $$->augType = at_all_from_state; };
+
+aug_type_from_state: TK_FinalFromState
+ final { $$->loc = $1->loc; $$->augType = at_final_from_state; };
+aug_type_from_state: '%' KW_From
+ final { $$->loc = $1->loc; $$->augType = at_final_from_state; };
+
+aug_type_from_state: TK_NotFinalFromState
+ final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; };
+aug_type_from_state: '@' KW_From
+ final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; };
+
+aug_type_from_state: TK_MiddleFromState
+ final { $$->loc = $1->loc; $$->augType = at_middle_from_state; };
+aug_type_from_state: TK_Middle KW_From
+ final { $$->loc = $1->loc; $$->augType = at_middle_from_state; };
+
+#
+# Eof state actions.
+#
+
+nonterm aug_type_eof uses aug_type;
+
+aug_type_eof: TK_StartEOF
+ final { $$->loc = $1->loc; $$->augType = at_start_eof; };
+aug_type_eof: '>' KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_start_eof; };
+
+aug_type_eof: TK_NotStartEOF
+ final { $$->loc = $1->loc; $$->augType = at_not_start_eof; };
+aug_type_eof: '<' KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_not_start_eof; };
+
+aug_type_eof: TK_AllEOF
+ final { $$->loc = $1->loc; $$->augType = at_all_eof; };
+aug_type_eof: '$' KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_all_eof; };
+
+aug_type_eof: TK_FinalEOF
+ final { $$->loc = $1->loc; $$->augType = at_final_eof; };
+aug_type_eof: '%' KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_final_eof; };
+
+aug_type_eof: TK_NotFinalEOF
+ final { $$->loc = $1->loc; $$->augType = at_not_final_eof; };
+aug_type_eof: '@' KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_not_final_eof; };
+
+aug_type_eof: TK_MiddleEOF
+ final { $$->loc = $1->loc; $$->augType = at_middle_eof; };
+aug_type_eof: TK_Middle KW_Eof
+ final { $$->loc = $1->loc; $$->augType = at_middle_eof; };
+
+#
+# Global error actions.
+#
+
+nonterm aug_type_gbl_error uses aug_type;
+
+aug_type_gbl_error: TK_StartGblError
+ final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; };
+aug_type_gbl_error: '>' KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; };
+
+aug_type_gbl_error: TK_NotStartGblError
+ final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; };
+aug_type_gbl_error: '<' KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; };
+
+aug_type_gbl_error: TK_AllGblError
+ final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; };
+aug_type_gbl_error: '$' KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; };
+
+aug_type_gbl_error: TK_FinalGblError
+ final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; };
+aug_type_gbl_error: '%' KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; };
+
+aug_type_gbl_error: TK_NotFinalGblError
+ final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; };
+aug_type_gbl_error: '@' KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; };
+
+aug_type_gbl_error: TK_MiddleGblError
+ final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; };
+aug_type_gbl_error: TK_Middle KW_Err
+ final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; };
+
+
+#
+# Local error actions.
+#
+
+nonterm aug_type_local_error uses aug_type;
+
+aug_type_local_error: TK_StartLocalError
+ final { $$->loc = $1->loc; $$->augType = at_start_local_error; };
+aug_type_local_error: '>' KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_start_local_error; };
+
+aug_type_local_error: TK_NotStartLocalError
+ final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; };
+aug_type_local_error: '<' KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; };
+
+aug_type_local_error: TK_AllLocalError
+ final { $$->loc = $1->loc; $$->augType = at_all_local_error; };
+aug_type_local_error: '$' KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_all_local_error; };
+
+aug_type_local_error: TK_FinalLocalError
+ final { $$->loc = $1->loc; $$->augType = at_final_local_error; };
+aug_type_local_error: '%' KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_final_local_error; };
+
+aug_type_local_error: TK_NotFinalLocalError
+ final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; };
+aug_type_local_error: '@' KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; };
+
+aug_type_local_error: TK_MiddleLocalError
+ final { $$->loc = $1->loc; $$->augType = at_middle_local_error; };
+aug_type_local_error: TK_Middle KW_Lerr
+ final { $$->loc = $1->loc; $$->augType = at_middle_local_error; };
+
+
+type action_ref
+{
+ Action *action;
+};
+
+# Different ways to embed actions. A TK_Word is reference to an action given by
+# the user as a statement in the fsm specification. An action can also be
+# specified immediately.
+nonterm action_embed uses action_ref;
+
+action_embed: action_embed_word final { $$->action = $1->action; };
+action_embed: action_embed_block final { $$->action = $1->action; };
+
+nonterm action_embed_word uses action_ref;
+
+action_embed_word:
+ TK_Word final {
+ /* Set the name in the actionDict. */
+ Action *action = pd->actionDict.find( $1->data );
+ if ( action != 0 ) {
+ /* Pass up the action element */
+ $$->action = action;
+ }
+ else {
+ /* Will recover by returning null as the action. */
+ error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl;
+ $$->action = 0;
+ }
+ };
+
+nonterm action_embed_block uses action_ref;
+
+action_embed_block:
+ '{' inline_block '}' final {
+ /* Create the action, add it to the list and pass up. */
+ Action *newAction = new Action( $1->loc, 0, $2->inlineList );
+ pd->actionList.append( newAction );
+ $$->action = newAction;
+ };
+
+nonterm priority_name
+{
+ int priorityName;
+};
+
+# A specified priority name. Looks up the name in the current priority
+# dictionary.
+priority_name:
+ TK_Word final {
+ // Lookup/create the priority key.
+ PriorDictEl *priorDictEl;
+ if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) )
+ pd->nextPriorKey += 1;
+
+ // Use the inserted/found priority key.
+ $$->priorityName = priorDictEl->value;
+ };
+
+nonterm priority_aug
+{
+ int priorityNum;
+};
+
+# Priority change specs.
+priority_aug:
+ priority_aug_num final {
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ //cerr << "PRIOR AUG: " << $1->token.data << endl;
+ int aug = strtol( $1->token.data, 0, 10 );
+ if ( errno == ERANGE && aug == LONG_MAX ) {
+ /* Priority number too large. Recover by setting the priority to 0. */
+ error($1->token.loc) << "priority number " << $1->token.data <<
+ " overflows" << endl;
+ $$->priorityNum = 0;
+ }
+ else if ( errno == ERANGE && aug == LONG_MIN ) {
+ /* Priority number too large in the neg. Recover by using 0. */
+ error($1->token.loc) << "priority number " << $1->token.data <<
+ " underflows" << endl;
+ $$->priorityNum = 0;
+ }
+ else {
+ /* No overflow or underflow. */
+ $$->priorityNum = aug;
+ }
+ };
+
+nonterm priority_aug_num uses token_type;
+
+priority_aug_num:
+ TK_UInt final {
+ $$->token = *$1;
+ };
+priority_aug_num:
+ '+' TK_UInt final {
+ $$->token.set( "+", 1 );
+ $$->token.loc = $1->loc;
+ $$->token.append( *$2 );
+ };
+priority_aug_num:
+ '-' TK_UInt final {
+ $$->token.set( "-", 1 );
+ $$->token.loc = $1->loc;
+ $$->token.append( *$2 );
+ };
+
+nonterm local_err_name
+{
+ int error_name;
+};
+
+local_err_name:
+ TK_Word final {
+ /* Lookup/create the priority key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) )
+ pd->nextLocalErrKey += 1;
+
+ /* Use the inserted/found priority key. */
+ $$->error_name = localErrDictEl->value;
+ };
+
+
+
+# The fourth level of precedence. These are the trailing unary operators that
+# allow for repetition.
+
+nonterm factor_with_rep
+{
+ FactorWithRep *factorWithRep;
+};
+
+factor_with_rep:
+ factor_with_rep '*' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::StarType );
+ };
+factor_with_rep:
+ factor_with_rep TK_StarStar final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::StarStarType );
+ };
+factor_with_rep:
+ factor_with_rep '?' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::OptionalType );
+ };
+factor_with_rep:
+ factor_with_rep '+' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::PlusType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, 0, FactorWithRep::ExactType );
+ };
+factor_with_rep:
+ factor_with_rep '{' ',' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, $4->rep, FactorWithRep::MaxType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num ',' '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, 0, FactorWithRep::MinType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, $5->rep, FactorWithRep::RangeType );
+ };
+factor_with_rep:
+ factor_with_neg final {
+ $$->factorWithRep = new FactorWithRep(
+ $1->factorWithNeg->loc, $1->factorWithNeg );
+ };
+
+nonterm factor_rep_num
+{
+ int rep;
+};
+
+factor_rep_num:
+ TK_UInt final {
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ int rep = strtol( $1->data, 0, 10 );
+ if ( errno == ERANGE && rep == LONG_MAX ) {
+ // Repetition too large. Recover by returing repetition 1. */
+ error($1->loc) << "repetition number " << $1->data << " overflows" << endl;
+ $$->rep = 1;
+ }
+ else {
+ // Cannot be negative, so no overflow.
+ $$->rep = rep;
+ }
+ };
+
+
+#
+# The fifth level up in precedence. Negation.
+#
+
+nonterm factor_with_neg
+{
+ FactorWithNeg *factorWithNeg;
+};
+
+factor_with_neg:
+ '!' factor_with_neg final {
+ $$->factorWithNeg = new FactorWithNeg( $1->loc,
+ $2->factorWithNeg, FactorWithNeg::NegateType );
+ };
+factor_with_neg:
+ '^' factor_with_neg final {
+ $$->factorWithNeg = new FactorWithNeg( $1->loc,
+ $2->factorWithNeg, FactorWithNeg::CharNegateType );
+ };
+factor_with_neg:
+ factor final {
+ $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor );
+ };
+
+nonterm factor
+{
+ Factor *factor;
+};
+
+factor:
+ TK_Literal final {
+ /* Create a new factor node going to a concat literal. */
+ $$->factor = new Factor( new Literal( *$1, Literal::LitString ) );
+ };
+factor:
+ alphabet_num final {
+ /* Create a new factor node going to a literal number. */
+ $$->factor = new Factor( new Literal( $1->token, Literal::Number ) );
+ };
+factor:
+ TK_Word final {
+ /* Find the named graph. */
+ GraphDictEl *gdNode = pd->graphDict.find( $1->data );
+ if ( gdNode == 0 ) {
+ /* Recover by returning null as the factor node. */
+ error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl;
+ $$->factor = 0;
+ }
+ else if ( gdNode->isInstance ) {
+ /* Recover by retuning null as the factor node. */
+ error($1->loc) << "references to graph instantiations not allowed "
+ "in expressions" << endl;
+ $$->factor = 0;
+ }
+ else {
+ /* Create a factor node that is a lookup of an expression. */
+ $$->factor = new Factor( $1->loc, gdNode->value );
+ }
+ };
+factor:
+ RE_SqOpen regular_expr_or_data RE_SqClose final {
+ /* Create a new factor node going to an OR expression. */
+ $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) );
+ };
+factor:
+ RE_SqOpenNeg regular_expr_or_data RE_SqClose final {
+ /* Create a new factor node going to a negated OR expression. */
+ $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) );
+ };
+factor:
+ RE_Slash regular_expr RE_Slash final {
+ if ( $3->length > 1 ) {
+ for ( char *p = $3->data; *p != 0; p++ ) {
+ if ( *p == 'i' )
+ $2->regExpr->caseInsensitive = true;
+ }
+ }
+
+ /* Create a new factor node going to a regular exp. */
+ $$->factor = new Factor( $2->regExpr );
+ };
+factor:
+ range_lit TK_DotDot range_lit final {
+ /* Create a new factor node going to a range. */
+ $$->factor = new Factor( new Range( $1->literal, $3->literal ) );
+ };
+factor:
+ '(' join ')' final {
+ /* Create a new factor going to a parenthesized join. */
+ $$->factor = new Factor( $2->join );
+ };
+
+nonterm range_lit
+{
+ Literal *literal;
+};
+
+# Literals which can be the end points of ranges.
+range_lit:
+ TK_Literal final {
+ /* Range literas must have only one char. We restrict this in the parse tree. */
+ $$->literal = new Literal( *$1, Literal::LitString );
+ };
+range_lit:
+ alphabet_num final {
+ /* Create a new literal number. */
+ $$->literal = new Literal( $1->token, Literal::Number );
+ };
+
+nonterm alphabet_num uses token_type;
+
+# Any form of a number that can be used as a basic machine. */
+alphabet_num:
+ TK_UInt final {
+ $$->token = *$1;
+ };
+alphabet_num:
+ '-' TK_UInt final {
+ $$->token.set( "-", 1 );
+ $$->token.loc = $1->loc;
+ $$->token.append( *$2 );
+ };
+alphabet_num:
+ TK_Hex final {
+ $$->token = *$1;
+ };
+#
+# Regular Expressions.
+#
+
+nonterm regular_expr
+{
+ RegExpr *regExpr;
+};
+
+# Parser for regular expression fsms. Any number of expression items which
+# generally gives a machine one character long or one character long stared.
+regular_expr:
+ regular_expr regular_expr_item final {
+ /* An optimization to lessen the tree size. If a non-starred char is
+ * directly under the left side on the right and the right side is
+ * another non-starred char then paste them together and return the
+ * left side. Otherwise just put the two under a new reg exp node. */
+ if ( $2->reItem->type == ReItem::Data && !$2->reItem->star &&
+ $1->regExpr->type == RegExpr::RecurseItem &&
+ $1->regExpr->item->type == ReItem::Data && !$1->regExpr->item->star )
+ {
+ /* Append the right side to the right side of the left and toss the
+ * right side. */
+ $1->regExpr->item->token.append( $2->reItem->token );
+ delete $2->reItem;
+ $$->regExpr = $1->regExpr;
+ }
+ else {
+ $$->regExpr = new RegExpr( $1->regExpr, $2->reItem );
+ }
+ };
+regular_expr:
+ final {
+ /* Can't optimize the tree. */
+ $$->regExpr = new RegExpr();
+ };
+
+nonterm regular_expr_item
+{
+ ReItem *reItem;
+};
+
+# RegularExprItems can be a character spec with an optional staring of the char.
+regular_expr_item:
+ regular_expr_char RE_Star final {
+ $1->reItem->star = true;
+ $$->reItem = $1->reItem;
+ };
+regular_expr_item:
+ regular_expr_char final {
+ $$->reItem = $1->reItem;
+ };
+
+nonterm regular_expr_char
+{
+ ReItem *reItem;
+};
+
+# A character spec can be a set of characters inside of square parenthesis, a
+# dot specifying any character or some explicitly stated character.
+regular_expr_char:
+ RE_SqOpen regular_expr_or_data RE_SqClose final {
+ $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock );
+ };
+regular_expr_char:
+ RE_SqOpenNeg regular_expr_or_data RE_SqClose final {
+ $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock );
+ };
+regular_expr_char:
+ RE_Dot final {
+ $$->reItem = new ReItem( $1->loc, ReItem::Dot );
+ };
+regular_expr_char:
+ RE_Char final {
+ $$->reItem = new ReItem( $1->loc, *$1 );
+ };
+
+# The data inside of a [] expression in a regular expression. Accepts any
+# number of characters or ranges. */
+nonterm regular_expr_or_data
+{
+ ReOrBlock *reOrBlock;
+};
+
+regular_expr_or_data:
+ regular_expr_or_data regular_expr_or_char final {
+ /* An optimization to lessen the tree size. If an or char is directly
+ * under the left side on the right and the right side is another or
+ * char then paste them together and return the left side. Otherwise
+ * just put the two under a new or data node. */
+ if ( $2->reOrItem->type == ReOrItem::Data &&
+ $1->reOrBlock->type == ReOrBlock::RecurseItem &&
+ $1->reOrBlock->item->type == ReOrItem::Data )
+ {
+ /* Append the right side to right side of the left and toss the
+ * right side. */
+ $1->reOrBlock->item->token.append( $2->reOrItem->token );
+ delete $2->reOrItem;
+ $$->reOrBlock = $1->reOrBlock;
+ }
+ else {
+ /* Can't optimize, put the left and right under a new node. */
+ $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem );
+ }
+ };
+regular_expr_or_data:
+ final {
+ $$->reOrBlock = new ReOrBlock();
+ };
+
+# A single character inside of an or expression. Can either be a character or a
+# set of characters.
+nonterm regular_expr_or_char
+{
+ ReOrItem *reOrItem;
+};
+
+regular_expr_or_char:
+ RE_Char final {
+ $$->reOrItem = new ReOrItem( $1->loc, *$1 );
+ };
+regular_expr_or_char:
+ RE_Char RE_Dash RE_Char final {
+ $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] );
+ };
+
+#
+# Inline Lists for inline host code.
+#
+
+type inline_list
+{
+ InlineList *inlineList;
+};
+
+nonterm inline_block uses inline_list;
+
+inline_block:
+ inline_block inline_block_item
+ final {
+ /* Append the item to the list, return the list. */
+ $$->inlineList = $1->inlineList;
+ $$->inlineList->append( $2->inlineItem );
+ };
+
+inline_block:
+ final {
+ /* Start with empty list. */
+ $$->inlineList = new InlineList;
+ };
+
+type inline_item
+{
+ InlineItem *inlineItem;
+};
+
+nonterm inline_block_item uses inline_item;
+nonterm inline_block_interpret uses inline_item;
+
+inline_block_item:
+ inline_expr_any
+ final {
+ $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text );
+ };
+
+inline_block_item:
+ inline_block_symbol
+ final {
+ $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text );
+ };
+
+inline_block_item:
+ inline_block_interpret
+ final {
+ /* Pass the inline item up. */
+ $$->inlineItem = $1->inlineItem;
+ };
+
+nonterm inline_block_symbol uses token_type;
+
+inline_block_symbol: ',' final { $$->token = *$1; };
+inline_block_symbol: ';' final { $$->token = *$1; };
+inline_block_symbol: '(' final { $$->token = *$1; };
+inline_block_symbol: ')' final { $$->token = *$1; };
+inline_block_symbol: '*' final { $$->token = *$1; };
+inline_block_symbol: TK_NameSep final { $$->token = *$1; };
+
+# Interpreted statements in a struct block. */
+inline_block_interpret:
+ inline_expr_interpret final {
+ /* Pass up interpreted items of inline expressions. */
+ $$->inlineItem = $1->inlineItem;
+ };
+inline_block_interpret:
+ KW_Hold ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Hold );
+ };
+inline_block_interpret:
+ KW_Exec inline_expr ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Exec );
+ $$->inlineItem->children = $2->inlineList;
+ };
+inline_block_interpret:
+ KW_Goto state_ref ';' final {
+ $$->inlineItem = new InlineItem( $1->loc,
+ new NameRef(nameRef), InlineItem::Goto );
+ };
+inline_block_interpret:
+ KW_Goto '*' inline_expr ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::GotoExpr );
+ $$->inlineItem->children = $3->inlineList;
+ };
+inline_block_interpret:
+ KW_Next state_ref ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Next );
+ };
+inline_block_interpret:
+ KW_Next '*' inline_expr ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::NextExpr );
+ $$->inlineItem->children = $3->inlineList;
+ };
+inline_block_interpret:
+ KW_Call state_ref ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Call );
+ };
+inline_block_interpret:
+ KW_Call '*' inline_expr ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::CallExpr );
+ $$->inlineItem->children = $3->inlineList;
+ };
+inline_block_interpret:
+ KW_Ret ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Ret );
+ };
+inline_block_interpret:
+ KW_Break ';' final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Break );
+ };
+
+nonterm inline_expr uses inline_list;
+
+inline_expr:
+ inline_expr inline_expr_item
+ final {
+ $$->inlineList = $1->inlineList;
+ $$->inlineList->append( $2->inlineItem );
+ };
+inline_expr:
+ final {
+ /* Init the list used for this expr. */
+ $$->inlineList = new InlineList;
+ };
+
+nonterm inline_expr_item uses inline_item;
+
+inline_expr_item:
+ inline_expr_any
+ final {
+ /* Return a text segment. */
+ $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text );
+ };
+inline_expr_item:
+ inline_expr_symbol
+ final {
+ /* Return a text segment, must heap alloc the text. */
+ $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text );
+ };
+inline_expr_item:
+ inline_expr_interpret
+ final{
+ /* Pass the inline item up. */
+ $$->inlineItem = $1->inlineItem;
+ };
+
+nonterm inline_expr_any uses token_type;
+
+inline_expr_any: IL_WhiteSpace try { $$->token = *$1; };
+inline_expr_any: IL_Comment try { $$->token = *$1; };
+inline_expr_any: IL_Literal try { $$->token = *$1; };
+inline_expr_any: IL_Symbol try { $$->token = *$1; };
+inline_expr_any: TK_UInt try { $$->token = *$1; };
+inline_expr_any: TK_Hex try { $$->token = *$1; };
+inline_expr_any: TK_Word try { $$->token = *$1; };
+
+# Anything in a ExecValExpr that is not dynamically allocated. This includes
+# all special symbols caught in inline code except the semi.
+
+nonterm inline_expr_symbol uses token_type;
+
+inline_expr_symbol: ',' try { $$->token = *$1; };
+inline_expr_symbol: '(' try { $$->token = *$1; };
+inline_expr_symbol: ')' try { $$->token = *$1; };
+inline_expr_symbol: '*' try { $$->token = *$1; };
+inline_expr_symbol: TK_NameSep try { $$->token = *$1; };
+
+nonterm inline_expr_interpret uses inline_item;
+
+inline_expr_interpret:
+ KW_PChar
+ final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::PChar );
+ };
+inline_expr_interpret:
+ KW_Char
+ final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Char );
+ };
+inline_expr_interpret:
+ KW_CurState
+ final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Curs );
+ };
+inline_expr_interpret:
+ KW_TargState
+ final {
+ $$->inlineItem = new InlineItem( $1->loc, InlineItem::Targs );
+ };
+inline_expr_interpret:
+ KW_Entry '(' state_ref ')'
+ final {
+ $$->inlineItem = new InlineItem( $1->loc,
+ new NameRef(nameRef), InlineItem::Entry );
+ };
+
+# A local state reference. Cannot have :: prefix.
+local_state_ref:
+ no_name_sep state_ref_names;
+
+# Clear the name ref structure.
+no_name_sep:
+ final {
+ nameRef.empty();
+ };
+
+# A qualified state reference.
+state_ref: opt_name_sep state_ref_names;
+
+# Optional leading name separator.
+opt_name_sep:
+ TK_NameSep
+ final {
+ /* Insert an initial null pointer val to indicate the existence of the
+ * initial name seperator. */
+ nameRef.setAs( 0 );
+ };
+opt_name_sep:
+ final {
+ nameRef.empty();
+ };
+
+# List of names separated by ::
+state_ref_names:
+ state_ref_names TK_NameSep TK_Word
+ final {
+ nameRef.append( $3->data );
+ };
+state_ref_names:
+ TK_Word
+ final {
+ nameRef.append( $1->data );
+ };
+
+}%%
+
+void Parser::tryMachineDef( InputLoc &loc, char *name,
+ JoinOrLm *joinOrLm, bool isInstance )
+{
+ GraphDictEl *newEl = pd->graphDict.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new VarDef( name, joinOrLm );
+ newEl->isInstance = isInstance;
+ newEl->loc = loc;
+
+ /* It it is an instance, put on the instance list. */
+ if ( isInstance )
+ pd->instanceList.append( newEl );
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "fsm \"" << name << "\" previously defined" << endl;
+ }
+}
+
+ostream &error( const InputLoc &loc )
+{
+ gblErrorCount += 1;
+ assert( loc.fileName != 0 );
+ cerr << loc.fileName << ":" << loc.line << ": ";
+ return cerr;
+}
+
+ostream &Parser::parser_error( int tokId, Token &token )
+{
+ gblErrorCount += 1;
+
+ cerr << token.loc.fileName << ":" << token.loc.line << ": ";
+ cerr << "at token ";
+ if ( tokId < 128 )
+ cerr << "\"" << lelNames[tokId] << "\"";
+ else
+ cerr << lelNames[tokId];
+ if ( token.data != 0 )
+ cerr << " with data \"" << token.data << "\"";
+ cerr << ": ";
+
+ return cerr;
+}
+
+int Parser::token( InputLoc &loc, int tokId, char *tokstart, int toklen )
+{
+ Token token;
+ token.data = tokstart;
+ token.length = toklen;
+ token.loc = loc;
+ int res = parseLangEl( tokId, token );
+ if ( res < 0 ) {
+ parser_error(tokId, token) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
diff --git a/ragel/rlparse.y b/ragel/rlparse.y
new file mode 100644
index 0000000..b0fc3df
--- /dev/null
+++ b/ragel/rlparse.y
@@ -0,0 +1,1456 @@
+/*
+ * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+%{
+
+#include <iostream>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include "ragel.h"
+#include "parsetree.h"
+#include "rlparse.h"
+
+using std::cerr;
+using std::endl;
+
+InputData *id = 0;
+int includeDepth = 0;
+
+extern bool inlineWhitespace;
+
+/* These come from the scanner and point back into the parser. We will borrow
+ * them for error reporting. */
+extern YYSTYPE *yylval;
+extern YYLTYPE *yylloc;
+
+/* The include stack pointer from the scanner. Used to determine if we are
+ * currently processing an included file. */
+extern int inc_stack_ptr;
+
+/* Try to do a definition, common to assignment and instantiation. */
+void tryMachineDef( const YYLTYPE &loc, char *name,
+ JoinOrLm *joinOrLm, bool isInstance );
+void beginOutsideCode();
+void doInclude( const InputLoc &loc, char *sectionName, char *inputFile );
+int yylex( YYSTYPE *yylval, YYLTYPE *yylloc );
+
+bool sectionOpened;
+void openSection();
+
+#define WO_NOEND 0x01
+
+%}
+
+%pure-parser
+
+%union {
+ /* General data types. */
+ char c;
+ TokenData data;
+ int integer;
+ Literal *literal;
+
+ /* Tree nodes. */
+ Term *term;
+ FactorWithAug *factorWithAug;
+ FactorWithRep *factorWithRep;
+ FactorWithNeg *factorWithNeg;
+ Factor *factor;
+ Expression *expression;
+ Join *join;
+ JoinOrLm *joinOrLm;
+ LmPartList *longestMatchList;
+ LongestMatchPart *longestMatchPart;
+
+ /* Priorities and actions. */
+ AugType augType;
+ StateAugType stateAugType;
+ Action *action;
+ PriorDesc *priorDesc;
+
+ /* Regular expression items. */
+ RegExpr *regExp;
+ ReItem *reItem;
+ ReOrBlock *reOrBlock;
+ ReOrItem *reOrItem;
+
+ /* Inline parse tree items. */
+ InlineItem *ilitem;
+ InlineList *illist;
+}
+
+%token TK_Section
+%token TK_SectionNL
+
+/* General tokens. */
+%token <data> TK_UInt
+%token <data> TK_Hex
+%token <data> TK_Word
+%token <data> TK_Literal
+%token <data> TK_CiLiteral
+%token <data> TK_BaseClause
+%token TK_DotDot
+%token TK_ColonGt
+%token TK_ColonGtGt
+%token TK_LtColon
+%token TK_Arrow
+%token TK_DoubleArrow
+%token TK_StarStar
+%token TK_ColonEquals
+%token TK_NameSep
+%token TK_BarStar
+%token TK_RepOpOpen
+%token TK_DashDash
+
+%token TK_StartCond
+%token TK_AllCond
+%token TK_LeavingCond
+
+%token TK_Middle
+
+/* Global error actions. */
+%token TK_StartGblError
+%token TK_AllGblError
+%token TK_FinalGblError
+%token TK_NotFinalGblError
+%token TK_NotStartGblError
+%token TK_MiddleGblError
+
+/* Local error actions. */
+%token TK_StartLocalError
+%token TK_AllLocalError
+%token TK_FinalLocalError
+%token TK_NotFinalLocalError
+%token TK_NotStartLocalError
+%token TK_MiddleLocalError
+
+/* EOF Action embedding. */
+%token TK_StartEOF
+%token TK_AllEOF
+%token TK_FinalEOF
+%token TK_NotFinalEOF
+%token TK_NotStartEOF
+%token TK_MiddleEOF
+
+/* To State Actions. */
+%token TK_StartToState
+%token TK_AllToState
+%token TK_FinalToState
+%token TK_NotFinalToState
+%token TK_NotStartToState
+%token TK_MiddleToState
+
+/* In State Actions. */
+%token TK_StartFromState
+%token TK_AllFromState
+%token TK_FinalFromState
+%token TK_NotFinalFromState
+%token TK_NotStartFromState
+%token TK_MiddleFromState
+
+/* Regular expression tokens. */
+%token <data> RE_Slash
+%token RE_SqOpen
+%token RE_SqOpenNeg
+%token RE_SqClose
+%token RE_Dot
+%token RE_Star
+%token RE_Dash
+%token <data> RE_Char
+
+/* Tokens specific to inline code. */
+%token <data> IL_WhiteSpace
+%token <data> IL_Comment
+%token <data> IL_Literal
+%token <data> IL_Symbol
+
+/* Keywords. */
+%token KW_Action
+%token KW_AlphType
+%token KW_Range
+%token KW_GetKey
+%token KW_Include
+%token KW_Write
+%token KW_Machine
+%token KW_When
+%token KW_Eof
+%token KW_Err
+%token KW_Lerr
+%token KW_To
+%token KW_From
+
+/* Specials in code blocks. */
+%token KW_Break
+%token KW_Exec
+%token KW_Hold
+%token KW_PChar
+%token KW_Char
+%token KW_Goto
+%token KW_Call
+%token KW_Ret
+%token KW_CurState
+%token KW_TargState
+%token KW_Entry
+%token KW_Next
+%token KW_Exec
+%token<data> KW_Variable
+%token KW_Access
+
+/* Special token for terminating semi-terminated code blocks. Needed because
+ * semi is sent as a token in the code block rather than as a generic symbol. */
+%token TK_Semi
+
+/* Symbols. In ragel lexical space, the scanner does not pass
+ * any data along with the symbols, in inline code lexical
+ * space it does. */
+%token '*' '?' '+' '!' '^' '(' ')' ';' ',' '='
+%token ':' '@' '%' '$' '-' '|' '&' '.' '>'
+
+/* Precedence information. Lower is a higher precedence. We need only two
+ * precedence groups. Shifting the minus sign in front of a literal number
+ * conflicts with the reduction of Expression and the subsequent shifting of a
+ * subtraction operator when a '-' is seen. Since we want subtraction to take
+ * precedence, we give EXPR_MINUS the higher priority. */
+%nonassoc '-'
+%nonassoc EXPR_MINUS
+
+%type <augType> AugTypeBase
+%type <augType> AugTypeGblError
+%type <augType> AugTypeLocalError
+%type <augType> AugTypeEOF
+%type <augType> AugTypeToState
+%type <augType> AugTypeFromState
+%type <augType> AugTypeCond
+%type <integer> PriorityAug
+%type <data> PriorityAugNum
+%type <action> ActionEmbed
+%type <action> ActionEmbedWord
+%type <action> ActionEmbedBlock
+%type <action> OptLmPartAction
+%type <longestMatchList> LmPartList
+%type <longestMatchPart> LongestMatchPart
+%type <join> Join
+%type <joinOrLm> JoinOrLm
+%type <expression> Expression
+%type <term> Term
+%type <factorWithAug> FactorWithLabel
+%type <factorWithAug> FactorWithEp
+%type <factorWithAug> FactorWithAug
+%type <factorWithAug> FactorWithTransAction
+%type <factorWithAug> FactorWithPriority
+%type <factorWithAug> FactorWithCond
+%type <factorWithAug> FactorWithToStateAction
+%type <factorWithAug> FactorWithFromStateAction
+%type <factorWithAug> FactorWithEOFAction
+%type <factorWithAug> FactorWithGblErrorAction
+%type <factorWithAug> FactorWithLocalErrorAction
+%type <factorWithRep> FactorWithRep
+%type <integer> FactorRepNum
+%type <factorWithNeg> FactorWithNeg
+%type <factor> Factor
+%type <literal> RangeLit
+%type <data> AlphabetNum
+%type <data> MachineName
+%type <integer> PriorityName
+%type <integer> LocalErrName
+%type <data> SectionName
+%type <data> OptSection
+%type <data> OptFileName
+%type <integer> EndSection
+
+%type <illist> InlineBlock
+%type <ilitem> InlineBlockItem
+%type <ilitem> InlineBlockInterpret
+%type <data> InlineBlockAny
+%type <data> InlineBlockSymbol
+
+%type <illist> InlineExpr
+%type <ilitem> InlineExprItem
+%type <ilitem> InlineExprInterpret
+%type <data> InlineExprSymbol
+%type <data> InlineExprAny
+
+%type <regExp> RegularExpr
+%type <reItem> RegularExprItem
+%type <reItem> RegularExprChar
+%type <reOrBlock> RegularExprOrData
+%type <reOrItem> RegularExprOrChar
+
+%%
+
+/* Input is any number of input sections. An empty file is accepted. */
+input: FsmSpecList;
+FsmSpecList:
+ FsmSpecList FsmSpec |
+ /* Nothing */;
+
+/* Fsm Specification. Fsms are begin with '%%' and may be a {} delimited
+ * list of Fsm statements or may be a single statement. If no name is
+ * given the last name given in a machine is used. */
+FsmSpec:
+ StartSection SectionName StatementList EndSection {
+ if ( includeDepth == 0 ) {
+ if ( sectionOpened )
+ *outStream << "</ragel_def>\n";
+
+ if ( machineSpec == 0 && machineName == 0 ) {
+ /* The end section may include a newline on the end, so
+ * we use the last line, which will count the newline. */
+ *outStream << "<host line=\"" << $4 << "\">";
+ }
+ }
+ };
+
+StartSection:
+ TK_Section {
+ id->sectionLoc = InputLoc(@1);
+
+ if ( includeDepth == 0 ) {
+ if ( machineSpec == 0 && machineName == 0 )
+ *outStream << "</host>\n";
+ sectionOpened = false;
+ }
+ };
+
+SectionName:
+ KW_Machine TK_Word ';' {
+ /* By default active until found not active. */
+ id->active = true;
+ id->sectionName = $2.data;
+
+ if ( id->includeSpec != 0 ) {
+ if ( strcmp( id->sectionName, id->includeSpec ) == 0 )
+ id->sectionName = id->includeTo;
+ else
+ id->active = false;
+ }
+
+ /* Lookup the parse data, if it is not there then create it. */
+ SectionMapEl *sectionMapEl = sectionMap.find( id->sectionName );
+ if ( sectionMapEl == 0 ) {
+ ParseData *newPd = new ParseData( id->fileName, id->sectionName,
+ id->sectionLoc );
+ sectionMapEl = sectionMap.insert( id->sectionName, newPd );
+ }
+ id->pd = sectionMapEl->value;
+ } |
+ /* Empty */ {
+ /* No machine name. Just use the previous section setup. Report an
+ * error if there is no previous section */
+ if ( id->pd == 0 ) {
+ error(id->sectionLoc) << "the first ragel section does not have a name" << endl;
+ id->pd = new ParseData( id->fileName, "<DUMMY>", id->sectionLoc );
+ }
+ };
+
+EndSection:
+ TK_Section { $$ = @1.last_line; } |
+ TK_SectionNL { $$ = @1.last_line + 1; };
+
+/* A NonEmpty list of statements in a fsm. */
+StatementList:
+ StatementList Statement |
+ /* Nothing */;
+
+/* The differnt types of statements in a fsm spec. */
+Statement:
+ Assignment |
+ Instantiation |
+ ActionSpec |
+ AlphSpec |
+ GetKeySpec |
+ RangeSpec |
+ Include |
+ Write |
+ Access |
+ Variable;
+
+/* Garble up to the next ; */
+Statement: error ';' { yyerrok; };
+
+/* Allow the user to create a named fsm action that can be referenced when
+ * building a machine. */
+ActionSpec:
+ KW_Action TK_Word '{' InlineBlock '}' {
+ if ( id->active ) {
+ if ( id->pd->actionDict.find( $2.data ) ) {
+ /* Recover by just ignoring the duplicate. */
+ error(@2) << "action \"" << $2.data << "\" already defined" << endl;
+ }
+ else {
+ /* Add the action to the list of actions. */
+ Action *newAction = new Action( InputLoc(@3), $2.data, $4, id->nameRefList );
+
+ /* Insert to list and dict. */
+ id->pd->actionList.append( newAction );
+ id->pd->actionDict.insert( newAction );
+ }
+ }
+ };
+
+/* Specifies the data type of the input alphabet. One or two words
+ * followed by a semi-colon. */
+AlphSpec:
+ KW_AlphType TK_Word TK_Word TK_Semi {
+ if ( id->active ) {
+ if ( ! id->pd->setAlphType( $2.data, $3.data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error(@2) << "\"" << $2.data <<
+ " " << $3.data << "\" is not a valid alphabet type" << endl;
+ }
+ }
+ } |
+ KW_AlphType TK_Word TK_Semi {
+ if ( id->active ) {
+ if ( ! id->pd->setAlphType( $2.data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error(@2) << "\"" << $2.data << "\" is not a valid alphabet type" << endl;
+ }
+ }
+ };
+
+GetKeySpec:
+ KW_GetKey InlineBlock TK_Semi {
+ if ( id->active )
+ id->pd->getKeyExpr = $2;
+ };
+
+/* Specifies a range to assume that the input characters will fall into. */
+RangeSpec:
+ KW_Range AlphabetNum AlphabetNum ';' {
+ if ( id->active ) {
+ // Save the upper and lower ends of the range and emit the line number.
+ id->pd->lowerNum = $2.data;
+ id->pd->upperNum = $3.data;
+ id->pd->rangeLowLoc = InputLoc(@2);
+ id->pd->rangeHighLoc = InputLoc(@3);
+ }
+ };
+
+
+Write:
+ WriteOpen WriteOptions ';' {
+ if ( id->active )
+ *outStream << "</write>\n";
+ };
+
+WriteOpen:
+ KW_Write TK_Word {
+ if ( id->active ) {
+ openSection();
+ if ( strcmp( $2.data, "data" ) != 0 &&
+ strcmp( $2.data, "init" ) != 0 &&
+ strcmp( $2.data, "exec" ) != 0 &&
+ strcmp( $2.data, "eof" ) != 0 )
+ {
+ error( @2 ) << "unknown write command" << endl;
+ }
+ *outStream << " <write what=\"" << $2.data << "\">";
+ }
+ };
+
+WriteOptions:
+ WriteOptions TK_Word {
+ if ( id->active )
+ *outStream << "<option>" << $2.data << "</option>";
+ } |
+ /* Nothing */;
+
+Access:
+ KW_Access InlineBlock TK_Semi {
+ if ( id->active )
+ id->pd->accessExpr = $2;
+ };
+
+Variable:
+ KW_Variable InlineBlock TK_Semi {
+ if ( id->active ) {
+ if ( strcmp( $1.data, "curstate" ) == 0 ) {
+ id->pd->curStateExpr = $2;
+ }
+ }
+ };
+
+/* Include statements are processed by both the scanner and the parser. */
+Include:
+ IncludeKeyword OptSection OptFileName ';' {
+ if ( id->active )
+ doInclude( @1, $2.data, $3.data );
+ };
+
+IncludeKeyword:
+ KW_Include {
+ /* Do this immediately so that the scanner has a correct sense of the
+ * value in id->active when it reaches the end of the statement before
+ * the above action executes. */
+ //getParseData( @1 );
+ };
+
+OptSection: TK_Word { $$ = $1; } | { $$.data = 0; $$.length = 0; };
+OptFileName: TK_Literal { $$ = $1; } | { $$.data = 0; $$.length = 0; };
+
+/* An assignement statement. Assigns the definition of a machine to a variable name. */
+Assignment:
+ MachineName '=' Join ';' {
+ if ( id->active ) {
+ /* Main machine must be an instance. */
+ bool isInstance = false;
+ if ( strcmp($1.data, machineMain) == 0 ) {
+ warning(@1) << "main machine will be implicitly instantiated" << endl;
+ isInstance = true;
+ }
+
+ /* Generic creation of machine for instantiation and assignment. */
+ JoinOrLm *joinOrLm = new JoinOrLm( $3 );
+ tryMachineDef( @1, $1.data, joinOrLm, isInstance );
+ }
+ };
+
+/* An instantiation statement. Instantiates a machine and assigns it to a
+ * variable name. */
+Instantiation:
+ MachineName TK_ColonEquals JoinOrLm ';' {
+ /* Generic creation of machine for instantiation and assignment. */
+ if ( id->active )
+ tryMachineDef( @1, $1.data, $3, true );
+ };
+
+/* Capture the machine name for making the machine's priority name. */
+MachineName:
+ TK_Word {
+ if ( id->active ) {
+ /* Make/get the priority key. The name may have already been referenced
+ * and therefore exist. */
+ PriorDictEl *priorDictEl;
+ if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) )
+ id->pd->nextPriorKey += 1;
+ id->pd->curDefPriorKey = priorDictEl->value;
+
+ /* Make/get the local error key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) )
+ id->pd->nextLocalErrKey += 1;
+ id->pd->curDefLocalErrKey = localErrDictEl->value;
+ }
+ };
+
+JoinOrLm:
+ Join {
+ $$ = new JoinOrLm( $1 );
+ } |
+ TK_BarStar LmPartList '*' '|' {
+ /* Create a new factor going to a longest match structure. Record
+ * in the parse data that we have a longest match. */
+ LongestMatch *lm = new LongestMatch( @1, $2 );
+ if ( id->active )
+ id->pd->lmList.append( lm );
+ for ( LmPartList::Iter lmp = *($2); lmp.lte(); lmp++ )
+ lmp->longestMatch = lm;
+ $$ = new JoinOrLm( lm );
+ };
+
+Join:
+ Join ',' Expression {
+ /* Append the expression to the list and return it. */
+ $1->exprList.append( $3 );
+ $$ = $1;
+ } |
+ Expression {
+ /* Create the expression list with the intial expression. */
+ $$ = new Join( InputLoc(@1), $1 );
+ };
+
+/* Top level production in the parse of a fsm. The lowest precedence
+ * is the '|' (or), '&' (intersection), and '-' (subtraction) operators. */
+Expression:
+ Expression '|' Term {
+ $$ = new Expression( $1, $3, Expression::OrType );
+ } %prec EXPR_MINUS |
+ Expression '&' Term {
+ $$ = new Expression( $1, $3, Expression::IntersectType );
+ } %prec EXPR_MINUS |
+ Expression '-' Term {
+ $$ = new Expression( $1, $3, Expression::SubtractType );
+ } %prec EXPR_MINUS |
+ Expression TK_DashDash Term {
+ $$ = new Expression( $1, $3, Expression::StrongSubtractType );
+ } %prec EXPR_MINUS |
+ Term {
+ $$ = new Expression( $1 );
+ } %prec EXPR_MINUS;
+
+Term:
+ Term FactorWithLabel {
+ $$ = new Term( $1, $2 );
+ } |
+ Term '.' FactorWithLabel {
+ $$ = new Term( $1, $3 );
+ } |
+ Term TK_ColonGt FactorWithLabel {
+ $$ = new Term( $1, $3, Term::RightStartType );
+ } |
+ Term TK_ColonGtGt FactorWithLabel {
+ $$ = new Term( $1, $3, Term::RightFinishType );
+ } |
+ Term TK_LtColon FactorWithLabel {
+ $$ = new Term( $1, $3, Term::LeftType );
+ } |
+ FactorWithLabel {
+ $$ = new Term( $1 );
+ };
+
+FactorWithLabel:
+ TK_Word ':' FactorWithLabel {
+ /* Add the label to the list and pass the factor up. */
+ $3->labels.prepend( Label(@1, $1.data) );
+ $$ = $3;
+ } |
+ FactorWithEp;
+
+FactorWithEp:
+ FactorWithEp TK_Arrow LocalStateRef {
+ /* Add the target to the list and return the factor object. */
+ $1->epsilonLinks.append( EpsilonLink( InputLoc(@2), id->nameRef ) );
+ $$ = $1;
+ } |
+ FactorWithAug;
+
+/* A local state reference. Qualified name witout :: prefix. */
+LocalStateRef:
+ NoNameSep StateRefNames;
+
+/* Clear the name ref structure. */
+NoNameSep:
+ /* Nothing */ {
+ id->nameRef.empty();
+ };
+
+/* A qualified state reference. */
+StateRef:
+ OptNameSep StateRefNames;
+
+/* Optional leading name separator. */
+OptNameSep:
+ TK_NameSep {
+ /* Insert an inition null pointer val to indicate the existence of the
+ * initial name seperator. */
+ id->nameRef.setAs( 0 );
+ } |
+ /* Nothing. */ {
+ id->nameRef.empty();
+ };
+
+/* List of names separated by :: */
+StateRefNames:
+ StateRefNames TK_NameSep TK_Word {
+ id->nameRef.append( $3.data );
+ } |
+ TK_Word {
+ id->nameRef.append( $1.data );
+ };
+
+/* Third group up in precedence. Allow users to embed actions and priorities */
+FactorWithAug:
+ FactorWithTransAction |
+ FactorWithPriority |
+ FactorWithCond |
+ FactorWithToStateAction |
+ FactorWithFromStateAction |
+ FactorWithEOFAction |
+ FactorWithGblErrorAction |
+ FactorWithLocalErrorAction |
+ FactorWithRep {
+ $$ = new FactorWithAug( $1 );
+ };
+
+FactorWithTransAction:
+ FactorWithAug AugTypeBase ActionEmbed {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
+ $$ = $1;
+ };
+
+FactorWithPriority:
+ FactorWithAug AugTypeBase PriorityAug {
+ if ( id->active ) {
+ /* Append the named priority to the factorWithAug and pass it up. */
+ $1->priorityAugs.append( PriorityAug( $2, id->pd->curDefPriorKey, $3 ) );
+ }
+ $$ = $1;
+ } |
+ FactorWithAug AugTypeBase '(' PriorityName ',' PriorityAug ')' {
+ /* Append the priority using a default name. */
+ $1->priorityAugs.append( PriorityAug( $2, $4, $6 ) );
+ $$ = $1;
+ };
+
+FactorWithCond:
+ FactorWithAug AugTypeCond ActionEmbed {
+ $$->conditions.append( ParserAction( @2, $2, 0, $3 ) );
+ $$ = $1;
+ };
+
+AugTypeCond:
+ TK_StartCond { $$ = at_start; } |
+ '>' KW_When { $$ = at_start; } |
+ TK_AllCond { $$ = at_all; } |
+ '$' KW_When { $$ = at_all; } |
+ TK_LeavingCond { $$ = at_leave; } |
+ '%' KW_When { $$ = at_all; } |
+ KW_When { $$ = at_all; };
+
+FactorWithToStateAction:
+ FactorWithAug AugTypeToState ActionEmbed {
+ /* Append the action, pass it up. */
+ $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
+ $$ = $1;
+ };
+
+FactorWithFromStateAction:
+ FactorWithAug AugTypeFromState ActionEmbed {
+ /* Append the action, pass it up. */
+ $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
+ $$ = $1;
+ };
+
+FactorWithEOFAction:
+ FactorWithAug AugTypeEOF ActionEmbed {
+ /* Append the action, pass it up. */
+ $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
+ $$ = $1;
+ };
+
+FactorWithGblErrorAction:
+ FactorWithAug AugTypeGblError ActionEmbed {
+ if ( id->active ) {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) );
+ }
+ $$ = $1;
+ };
+
+FactorWithLocalErrorAction:
+ FactorWithAug AugTypeLocalError ActionEmbed {
+ if ( id->active ) {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) );
+ }
+ $$ = $1;
+ } |
+ FactorWithAug AugTypeLocalError '(' LocalErrName ',' ActionEmbed ')' {
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ $1->actions.append( ParserAction( @2, $2, $4, $6 ) );
+ $$ = $1;
+ };
+
+/* A specified priority name. Looks up the name in the current priority
+ * dictionary. */
+PriorityName:
+ TK_Word {
+ if ( id->active ) {
+ // Lookup/create the priority key.
+ PriorDictEl *priorDictEl;
+ if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) )
+ id->pd->nextPriorKey += 1;
+
+ // Use the inserted/found priority key.
+ $$ = priorDictEl->value;
+ }
+ };
+
+LocalErrName:
+ TK_Word {
+ if ( id->active ) {
+ /* Lookup/create the priority key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) )
+ id->pd->nextLocalErrKey += 1;
+
+ /* Use the inserted/found priority key. */
+ $$ = localErrDictEl->value;
+ }
+ };
+
+/* Priority change specs. */
+PriorityAug:
+ PriorityAugNum {
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ int aug = strtol( $1.data, 0, 10 );
+ if ( errno == ERANGE && aug == LONG_MAX ) {
+ // Priority number too large. Recover by setting the priority to 0.
+ error(@1) << "priority number " << $1.data << " overflows" << endl;
+ $$ = 0;
+ }
+ else if ( errno == ERANGE && aug == LONG_MIN ) {
+ // Priority number too large in the neg. Recover by using 0.
+ error(@1) << "priority number " << $1.data << " underflows" << endl;
+ $$ = 0;
+ }
+ else {
+ // No overflow or underflow.
+ $$ = aug;
+ }
+ };
+
+PriorityAugNum:
+ TK_UInt |
+ '+' TK_UInt {
+ $$ = $2;
+ } |
+ '-' TK_UInt {
+ $$.data = "-";
+ $$.length = 1;
+ $$.append( $2 );
+ };
+
+/* Classes of transtions on which to embed actions or change priorities. */
+AugTypeBase:
+ '@' { $$ = at_finish; } |
+ '%' { $$ = at_leave; } |
+ '$' { $$ = at_all; } |
+ '>' { $$ = at_start; };
+
+/* Global error actions. */
+AugTypeGblError:
+ TK_StartGblError { $$ = at_start_gbl_error; } |
+ '>' KW_Err { $$ = at_start_gbl_error; } |
+
+ TK_NotStartGblError { $$ = at_not_start_gbl_error; } |
+ '<' KW_Err { $$ = at_not_start_gbl_error; } |
+
+ TK_AllGblError { $$ = at_all_gbl_error; } |
+ '$' KW_Err { $$ = at_all_gbl_error; } |
+
+ TK_FinalGblError { $$ = at_final_gbl_error; } |
+ '%' KW_Err { $$ = at_final_gbl_error; } |
+
+ TK_NotFinalGblError { $$ = at_not_final_gbl_error; } |
+ '@' KW_Err { $$ = at_not_final_gbl_error; } |
+
+ TK_MiddleGblError { $$ = at_middle_gbl_error; } |
+ TK_Middle KW_Err { $$ = at_middle_gbl_error; };
+
+/* Local error actions. */
+AugTypeLocalError:
+ TK_StartLocalError { $$ = at_start_local_error; } |
+ '>' KW_Lerr { $$ = at_start_local_error; } |
+
+ TK_NotStartLocalError { $$ = at_not_start_local_error; } |
+ '<' KW_Lerr { $$ = at_not_start_local_error; } |
+
+ TK_AllLocalError { $$ = at_all_local_error; } |
+ '$' KW_Lerr { $$ = at_all_local_error; } |
+
+ TK_FinalLocalError { $$ = at_final_local_error; } |
+ '%' KW_Lerr { $$ = at_final_local_error; } |
+
+ TK_NotFinalLocalError { $$ = at_not_final_local_error; } |
+ '@' KW_Lerr { $$ = at_not_final_local_error; } |
+
+ TK_MiddleLocalError { $$ = at_middle_local_error; } |
+ TK_Middle KW_Lerr { $$ = at_middle_local_error; };
+
+/* Eof state actions. */
+AugTypeEOF:
+ TK_StartEOF { $$ = at_start_eof; } |
+ '>' KW_Eof { $$ = at_start_eof; } |
+
+ TK_NotStartEOF { $$ = at_not_start_eof; } |
+ '<' KW_Eof { $$ = at_not_start_eof; } |
+
+ TK_AllEOF { $$ = at_all_eof; } |
+ '$' KW_Eof { $$ = at_all_eof; } |
+
+ TK_FinalEOF { $$ = at_final_eof; } |
+ '%' KW_Eof { $$ = at_final_eof; } |
+
+ TK_NotFinalEOF { $$ = at_not_final_eof; } |
+ '@' KW_Eof { $$ = at_not_final_eof; } |
+
+ TK_MiddleEOF { $$ = at_middle_eof; } |
+ TK_Middle KW_Eof { $$ = at_middle_eof; };
+
+/* To state actions. */
+AugTypeToState:
+ TK_StartToState { $$ = at_start_to_state; } |
+ '>' KW_To { $$ = at_start_to_state; } |
+
+ TK_NotStartToState { $$ = at_not_start_to_state; } |
+ '<' KW_To { $$ = at_not_start_to_state; } |
+
+ TK_AllToState { $$ = at_all_to_state; } |
+ '$' KW_To { $$ = at_all_to_state; } |
+
+ TK_FinalToState { $$ = at_final_to_state; } |
+ '%' KW_To { $$ = at_final_to_state; } |
+
+ TK_NotFinalToState { $$ = at_not_final_to_state; } |
+ '@' KW_To { $$ = at_not_final_to_state; } |
+
+ TK_MiddleToState { $$ = at_middle_to_state; } |
+ TK_Middle KW_To { $$ = at_middle_to_state; };
+
+/* From state actions. */
+AugTypeFromState:
+ TK_StartFromState { $$ = at_start_from_state; } |
+ '>' KW_From { $$ = at_start_from_state; } |
+
+ TK_NotStartFromState { $$ = at_not_start_from_state; } |
+ '<' KW_From { $$ = at_not_start_from_state; } |
+
+ TK_AllFromState { $$ = at_all_from_state; } |
+ '$' KW_From { $$ = at_all_from_state; } |
+
+ TK_FinalFromState { $$ = at_final_from_state; } |
+ '%' KW_From { $$ = at_final_from_state; } |
+
+ TK_NotFinalFromState { $$ = at_not_final_from_state; } |
+ '@' KW_From { $$ = at_not_final_from_state; } |
+
+ TK_MiddleFromState { $$ = at_middle_from_state; } |
+ TK_Middle KW_From { $$ = at_middle_from_state; };
+
+
+/* Different ways to embed actions. A TK_Word is reference to an action given by
+ * the user as a statement in the fsm specification. An action can also be
+ * specified immediately. */
+ActionEmbed:
+ ActionEmbedWord | ActionEmbedBlock;
+
+ActionEmbedWord:
+ TK_Word {
+ if ( id->active ) {
+ /* Set the name in the actionDict. */
+ Action *action = id->pd->actionDict.find( $1.data );
+ if ( action != 0 ) {
+ /* Pass up the action element */
+ $$ = action;
+ }
+ else {
+ /* Will recover by returning null as the action. */
+ error(@1) << "action lookup of \"" << $1.data << "\" failed" << endl;
+ $$ = 0;
+ }
+ }
+ };
+
+ActionEmbedBlock:
+ '{' InlineBlock '}' {
+ if ( id->active ) {
+ /* Create the action, add it to the list and pass up. */
+ Action *newAction = new Action( InputLoc(@1), 0, $2, id->nameRefList );
+ id->pd->actionList.append( newAction );
+ $$ = newAction;
+ }
+ };
+
+/* The fourth level of precedence. These are the trailing unary operators that
+ * allow for repetition. */
+FactorWithRep:
+ FactorWithRep '*' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
+ FactorWithRep::StarType );
+ } |
+ FactorWithRep TK_StarStar {
+ $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
+ FactorWithRep::StarStarType );
+ } |
+ FactorWithRep '?' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
+ FactorWithRep::OptionalType );
+ } |
+ FactorWithRep '+' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
+ FactorWithRep::PlusType );
+ } |
+ FactorWithRep TK_RepOpOpen FactorRepNum '}' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0,
+ FactorWithRep::ExactType );
+ } |
+ FactorWithRep TK_RepOpOpen ',' FactorRepNum '}' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, 0, $4,
+ FactorWithRep::MaxType );
+ } |
+ FactorWithRep TK_RepOpOpen FactorRepNum ',' '}' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0,
+ FactorWithRep::MinType );
+ } |
+ FactorWithRep TK_RepOpOpen FactorRepNum ',' FactorRepNum '}' {
+ $$ = new FactorWithRep( InputLoc(@2), $1, $3, $5,
+ FactorWithRep::RangeType );
+ } |
+ FactorWithNeg {
+ $$ = new FactorWithRep( InputLoc(@1), $1 );
+ };
+
+FactorRepNum:
+ TK_UInt {
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ int rep = strtol( $1.data, 0, 10 );
+ if ( errno == ERANGE && rep == LONG_MAX ) {
+ // Repetition too large. Recover by returing repetition 1. */
+ error(@1) << "repetition number " << $1.data << " overflows" << endl;
+ $$ = 1;
+ }
+ else {
+ // Cannot be negative, so no overflow.
+ $$ = rep;
+ }
+ };
+
+/* The fifth level up in precedence. Negation. */
+FactorWithNeg:
+ '!' FactorWithNeg {
+ $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::NegateType );
+ } |
+ '^' FactorWithNeg {
+ $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::CharNegateType );
+ } |
+ Factor {
+ $$ = new FactorWithNeg( InputLoc(@1), $1 );
+ };
+
+/* The highest level in precedence. Atomic machines such as references to other
+ * machines, literal machines, regular expressions or Expressions in side of
+ * parenthesis. */
+Factor:
+ TK_Literal {
+ // Create a new factor node going to a concat literal. */
+ $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) );
+ } |
+ TK_CiLiteral {
+ // Create a new factor node going to a concat literal. */
+ $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) );
+ $$->literal->caseInsensitive = true;
+ } |
+ AlphabetNum {
+ // Create a new factor node going to a literal number. */
+ $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::Number ) );
+ } |
+ TK_Word {
+ if ( id->active ) {
+ // Find the named graph.
+ GraphDictEl *gdNode = id->pd->graphDict.find( $1.data );
+ if ( gdNode == 0 ) {
+ // Recover by returning null as the factor node.
+ error(@1) << "graph lookup of \"" << $1.data << "\" failed" << endl;
+ $$ = 0;
+ }
+ else if ( gdNode->isInstance ) {
+ // Recover by retuning null as the factor node.
+ error(@1) << "references to graph instantiations not allowed "
+ "in expressions" << endl;
+ $$ = 0;
+ }
+ else {
+ // Create a factor node that is a lookup of an expression.
+ $$ = new Factor( InputLoc(@1), gdNode->value );
+ }
+ }
+ } |
+ RE_SqOpen RegularExprOrData RE_SqClose {
+ // Create a new factor node going to an OR expression. */
+ $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::OrBlock ) );
+ } |
+ RE_SqOpenNeg RegularExprOrData RE_SqClose {
+ // Create a new factor node going to a negated OR expression. */
+ $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ) );
+ } |
+ RE_Slash RegularExpr RE_Slash {
+ if ( $3.length > 1 ) {
+ for ( char *p = $3.data; *p != 0; p++ ) {
+ if ( *p == 'i' )
+ $2->caseInsensitive = true;
+ }
+ }
+
+ // Create a new factor node going to a regular exp.
+ $$ = new Factor( $2 );
+ } |
+ RangeLit TK_DotDot RangeLit {
+ // Create a new factor node going to a range. */
+ $$ = new Factor( new Range( $1, $3 ) );
+ } |
+ '(' Join ')' {
+ /* Create a new factor going to a parenthesized join. */
+ $$ = new Factor( $2 );
+ };
+
+/* Garble up to the closing brace of a parenthesized expression. */
+Factor: '(' error ')' { $$ = 0; yyerrok; };
+
+LmPartList:
+ LmPartList LongestMatchPart {
+ if ( $2 != 0 )
+ $1->append( $2 );
+ $$ = $1;
+ } |
+ LongestMatchPart {
+ /* Create a new list with the part. */
+ $$ = new LmPartList;
+ if ( $1 != 0 )
+ $$->append( $1 );
+ };
+
+LongestMatchPart:
+ ActionSpec { $$ = 0; } |
+ Assignment { $$ = 0; } |
+ Join OptLmPartAction ';' {
+ $$ = 0;
+ if ( id->active ) {
+ Action *action = $2;
+ if ( action != 0 )
+ action->isLmAction = true;
+ $$ = new LongestMatchPart( $1, action, id->pd->nextLongestMatchId++ );
+ }
+ };
+
+OptLmPartAction:
+ TK_DoubleArrow ActionEmbed { $$ = $2; } |
+ ActionEmbedBlock { $$ = $1; } |
+ /* Nothing */ { $$ = 0; };
+
+
+/* Any form of a number that can be used as a basic machine. */
+AlphabetNum:
+ TK_UInt |
+ '-' TK_UInt {
+ $$.data = "-";
+ $$.length = 1;
+ $$.append( $2 );
+ } |
+ TK_Hex;
+
+InlineBlock:
+ InlineBlock InlineBlockItem {
+ /* Append the item to the list, return the list. */
+ $1->append( $2 );
+ $$ = $1;
+ } |
+ /* Empty */ {
+ /* Start with empty list. */
+ $$ = new InlineList;
+ };
+
+/* Items in a struct block. */
+InlineBlockItem:
+ InlineBlockAny {
+ /* Add a text segment. */
+ $$ = new InlineItem( @1, $1.data, InlineItem::Text );
+ } |
+ InlineBlockSymbol {
+ /* Add a text segment, need string on heap. */
+ $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text );
+ } |
+ InlineBlockInterpret {
+ /* Pass the inline item up. */
+ $$ = $1;
+ };
+
+/* Uninteresting tokens in a struct block. Data allocated by scanner. */
+InlineBlockAny:
+ IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol |
+ TK_UInt | TK_Hex | TK_Word;
+
+/* Symbols in a struct block, no data allocated. */
+InlineBlockSymbol:
+ ',' { $$.data = ","; $$.length = 1; } |
+ ';' { $$.data = ";"; $$.length = 1; } |
+ '(' { $$.data = "("; $$.length = 1; } |
+ ')' { $$.data = ")"; $$.length = 1; } |
+ '*' { $$.data = "*"; $$.length = 1; } |
+ TK_NameSep { $$.data = "::"; $$.length = 2; };
+
+/* Interpreted statements in a struct block. */
+InlineBlockInterpret:
+ InlineExprInterpret {
+ /* Pass up interpreted items of inline expressions. */
+ $$ = $1;
+ } |
+ KW_Hold SetNoWs ';' SetWs {
+ $$ = new InlineItem( @1, InlineItem::Hold );
+ } |
+ KW_Exec SetNoWs InlineExpr ';' SetWs {
+ $$ = new InlineItem( @1, InlineItem::Exec );
+ $$->children = $3;
+ } |
+ KW_Goto SetNoWs StateRef ';' SetWs {
+ $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Goto );
+ } |
+ KW_Goto SetNoWs '*' SetWs InlineExpr ';' {
+ $$ = new InlineItem( @1, InlineItem::GotoExpr );
+ $$->children = $5;
+ } |
+ KW_Next SetNoWs StateRef ';' SetWs {
+ $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Next );
+ } |
+ KW_Next SetNoWs '*' SetWs InlineExpr ';' {
+ $$ = new InlineItem( @1, InlineItem::NextExpr );
+ $$->children = $5;
+ } |
+ KW_Call SetNoWs StateRef ';' SetWs {
+ $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Call );
+ } |
+ KW_Call SetNoWs '*' SetWs InlineExpr ';' {
+ $$ = new InlineItem( @1, InlineItem::CallExpr );
+ $$->children = $5;
+ } |
+ KW_Ret SetNoWs ';' SetWs {
+ $$ = new InlineItem( @1, InlineItem::Ret );
+ } |
+ KW_Break SetNoWs ';' SetWs {
+ $$ = new InlineItem( @1, InlineItem::Break );
+ };
+
+/* Turn off whitspace collecting when scanning inline blocks. */
+SetNoWs: { inlineWhitespace = false; };
+
+/* Turn on whitespace collecting when scanning inline blocks. */
+SetWs: { inlineWhitespace = true; };
+
+InlineExpr:
+ InlineExpr InlineExprItem {
+ $1->append( $2 );
+ $$ = $1;
+ } |
+ /* Empty */ {
+ /* Init the list used for this expr. */
+ $$ = new InlineList;
+ };
+
+InlineExprItem:
+ InlineExprAny {
+ /* Return a text segment. */
+ $$ = new InlineItem( @1, $1.data, InlineItem::Text );
+ } |
+ InlineExprSymbol {
+ /* Return a text segment, must heap alloc the text. */
+ $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text );
+ } |
+ InlineExprInterpret {
+ /* Pass the inline item up. */
+ $$ = $1;
+ };
+
+InlineExprInterpret:
+ KW_PChar {
+ $$ = new InlineItem( @1, InlineItem::PChar );
+ } |
+ KW_Char {
+ $$ = new InlineItem( @1, InlineItem::Char );
+ } |
+ KW_CurState {
+ $$ = new InlineItem( @1, InlineItem::Curs );
+ } |
+ KW_TargState {
+ $$ = new InlineItem( @1, InlineItem::Targs );
+ } |
+ KW_Entry SetNoWs '(' StateRef ')' SetWs {
+ $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Entry );
+ };
+
+InlineExprAny:
+ IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol |
+ TK_UInt | TK_Hex | TK_Word;
+
+/* Anything in a ExecValExpr that is not dynamically allocated. This includes
+ * all special symbols caught in inline code except the semi. */
+InlineExprSymbol:
+ '(' { $$.data = "("; $$.length = 1; } |
+ ')' { $$.data = ")"; $$.length = 1; } |
+ '*' { $$.data = "*"; $$.length = 1; } |
+ TK_NameSep { $$.data = "::"; $$.length = 1; };
+
+/* Parser for regular expression fsms. Any number of expression items which
+ * generally gives a machine one character long or one character long stared. */
+RegularExpr:
+ RegularExpr RegularExprItem {
+ // An optimization to lessen the tree size. If a non-starred char is directly
+ // under the left side on the right and the right side is another non-starred
+ // char then paste them together and return the left side. Otherwise
+ // just put the two under a new reg exp node.
+ if ( $2->type == ReItem::Data && !$2->star &&
+ $1->type == RegExpr::RecurseItem &&
+ $1->item->type == ReItem::Data && !$1->item->star )
+ {
+ // Append the right side to the right side of the left and toss
+ // the right side.
+ $1->item->data.append( $2->data );
+ delete $2;
+ $$ = $1;
+ }
+ else {
+ $$ = new RegExpr( $1, $2 );
+ }
+ } |
+ /* Nothing */ {
+ // Can't optimize the tree.
+ $$ = new RegExpr();
+ };
+
+/* RegularExprItems can be a character spec with an optional staring of the char. */
+RegularExprItem:
+ RegularExprChar RE_Star {
+ $1->star = true;
+ $$ = $1;
+ } |
+ RegularExprChar {
+ $$ = $1;
+ };
+
+/* A character spec can be a set of characters inside of square parenthesis,
+ * a dot specifying any character or some explicitly stated character. */
+RegularExprChar:
+ RE_SqOpen RegularExprOrData RE_SqClose {
+ $$ = new ReItem( InputLoc(@1), $2, ReItem::OrBlock );
+ } |
+ RE_SqOpenNeg RegularExprOrData RE_SqClose {
+ $$ = new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock );
+ } |
+ RE_Dot {
+ $$ = new ReItem( InputLoc(@1), ReItem::Dot );
+ } |
+ RE_Char {
+ $$ = new ReItem( InputLoc(@1), $1.data[0] );
+ };
+
+/* The data inside of a [] expression in a regular expression. Accepts any
+ * number of characters or ranges. */
+RegularExprOrData:
+ RegularExprOrData RegularExprOrChar {
+ // An optimization to lessen the tree size. If an or char is directly
+ // under the left side on the right and the right side is another or
+ // char then paste them together and return the left side. Otherwise
+ // just put the two under a new or data node.
+ if ( $2->type == ReOrItem::Data &&
+ $1->type == ReOrBlock::RecurseItem &&
+ $1->item->type == ReOrItem::Data )
+ {
+ // Append the right side to right side of the left and toss
+ // the right side.
+ $1->item->data.append( $2->data );
+ delete $2;
+ $$ = $1;
+ }
+ else {
+ // Can't optimize, put the left and right under a new node.
+ $$ = new ReOrBlock( $1, $2 );
+ }
+ } |
+ /* Nothing */ {
+ $$ = new ReOrBlock();
+ };
+
+
+/* A single character inside of an or expression. Can either be a character
+ * or a set of characters. */
+RegularExprOrChar:
+ RE_Char {
+ $$ = new ReOrItem( InputLoc(@1), $1.data[0] );
+ } |
+ RE_Char RE_Dash RE_Char {
+ $$ = new ReOrItem( InputLoc(@2), $1.data[0], $3.data[0] );
+ };
+
+RangeLit:
+ TK_Literal {
+ // Range literas must have only one char.
+ if ( strlen($1.data) != 1 ) {
+ // Recover by using the literal anyways.
+ error(@1) << "literal used in range must be of length 1" << endl;
+ }
+ $$ = new Literal( InputLoc(@1), $1, Literal::LitString );
+ } |
+ AlphabetNum {
+ // Create a new literal number.
+ $$ = new Literal( InputLoc(@1), $1, Literal::Number );
+ };
+
+%%
+
+/* Try to do a definition, common to assignment and instantiation. Warns about
+ * instances other than main not being implemented yet. */
+void tryMachineDef( const YYLTYPE &loc, char *name, JoinOrLm *joinOrLm, bool isInstance )
+{
+ GraphDictEl *newEl = id->pd->graphDict.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new VarDef( name, joinOrLm );
+ newEl->isInstance = isInstance;
+ newEl->loc = loc;
+
+ /* It it is an instance, put on the instance list. */
+ if ( isInstance )
+ id->pd->instanceList.append( newEl );
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "fsm \"" << name << "\" previously defined" << endl;
+ }
+}
+
+void doInclude( const InputLoc &loc, char *sectionName, char *inputFile )
+{
+ /* Bail if we hit the max include depth. */
+ if ( includeDepth == INCLUDE_STACK_SIZE ) {
+ error(loc) << "hit maximum include depth of " << INCLUDE_STACK_SIZE << endl;
+ }
+ else {
+ char *includeTo = id->pd->fsmName;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inputFile == 0 )
+ inputFile = id->fileName;
+ if ( sectionName == 0 )
+ sectionName = id->pd->fsmName;
+
+ /* Parse the included file. */
+ InputData *oldId = id;
+ id = new InputData( inputFile, sectionName, includeTo );
+ includeDepth += 1;
+ yyparse();
+ includeDepth -= 1;
+ delete id;
+ id = oldId;
+ }
+}
+
+void openSection()
+{
+ if ( ! sectionOpened ) {
+ sectionOpened = true;
+ *outStream << "<ragel_def name=\"" << id->pd->fsmName << "\">\n";
+ }
+}
+
+void yyerror( char *err )
+{
+ /* Bison won't give us the location, but in the last call to the scanner we
+ * saved a pointer to the location variable. Use that. instead. */
+ error(::yylloc->first_line, ::yylloc->first_column) << err << endl;
+}
diff --git a/ragel/rlscan.lex b/ragel/rlscan.lex
new file mode 100644
index 0000000..8116c92
--- /dev/null
+++ b/ragel/rlscan.lex
@@ -0,0 +1,1212 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+%{
+
+#define YY_NEVER_INTERACTIVE 1
+//#define WANT_TOKEN_WRITE
+
+#include <iostream>
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsedata.h"
+#include "buffer.h"
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+Buffer tokbuf;
+int builtinBrace = 0;
+bool inlineWhitespace = true;
+bool handlingInclude = false;
+bool multiline = false;
+
+/* Used for recognising host language code blocks, init with anything not
+ * involved in the host lang test. */
+int previous_tokens[2] = { TK_Section, TK_Section };
+
+/* These keep track of the start of an inline comment or literal string for
+ * reporting unterminated comments or strings. */
+int il_comm_lit_first_line;
+int il_comm_lit_first_column;
+
+/* These keep track of the start of a code block for reporting unterminated
+ * code blocks. */
+int il_code_first_line;
+int il_code_first_column;
+
+/* Include Stack data. */
+YY_BUFFER_STATE buff_stack[INCLUDE_STACK_SIZE];
+bool multiline_stack[INCLUDE_STACK_SIZE];
+int inc_stack_ptr = 0;
+
+YYSTYPE *yylval;
+YYLTYPE *yylloc;
+
+extern InputData *id;
+extern int includeDepth;
+
+void garble();
+
+void extendToken( char *data, int len );
+void extendToken();
+
+int emitToken( int token, char *data, int len );
+int emitNoData( int token );
+void passThrough( char *data );
+bool openMachineSpecBlock();
+void popInclude();
+
+enum InlineBlockType {
+ CurlyDelimited,
+ SemiTerminated
+} inlineBlockType;
+
+/* Using a wrapper for the parser, must the lex declaration. */
+#define YY_DECL int ragel_lex()
+
+%}
+
+/* Outside an fsm machine specification ("outside code"). */
+%x OC_SGL_LIT
+%x OC_DBL_LIT
+%x OC_C_COM
+%x OC_CXX_COM
+
+/* Inside a fsm machine specification. */
+%x RL_INITIAL
+%x RL_SLIT
+%x RL_DLIT
+%x RL_OREXP
+%x RL_REGEXP
+%x RL_REGEXP_OR
+%x RL_SHELL_COM
+%x RL_VERBOSE_EMBED
+%x RL_WRITE
+
+/* Inline code. */
+%x IL_INITIAL
+%x IL_SGL_LIT
+%x IL_DBL_LIT
+%x IL_C_COM
+%x IL_CXX_COM
+
+WSCHAR [\t\n\v\f\r ]
+IDENT [a-zA-Z_][a-zA-Z_0-9]*
+
+%%
+
+ /* Numbers in outter code. */
+<INITIAL>[0-9]+ {
+ garble();
+ passThrough( yytext );
+}
+
+ /* Words in outter code. */
+<INITIAL>{IDENT} {
+ garble();
+ passThrough( yytext );
+}
+
+ /* Begin a c style comment. */
+<INITIAL>"/*" {
+ BEGIN(OC_C_COM);
+ extendToken();
+ passThrough( yytext );
+}
+ /* Data in a C style comment. */
+<OC_C_COM>. extendToken(); passThrough( yytext );
+<OC_C_COM>\n extendToken(); passThrough( yytext );
+
+ /* Terminate a C style comment. */
+<OC_C_COM>"*/" {
+ BEGIN(INITIAL);
+ garble();
+ passThrough( yytext );
+}
+
+ /* Begin a C++ style comment. */
+<INITIAL>"//" {
+ BEGIN(OC_CXX_COM);
+ extendToken();
+ passThrough( yytext );
+}
+ /* Data in a C++ style comment. */
+<OC_CXX_COM>[^\n]+ {
+ extendToken();
+ passThrough( yytext );
+}
+ /* Terminate a C++ style comment. */
+<OC_CXX_COM>\n {
+ BEGIN(INITIAL);
+ garble();
+ passThrough( yytext );
+}
+
+
+ /* Start literals. */
+<INITIAL>\' {
+ BEGIN(OC_SGL_LIT);
+ extendToken();
+ passThrough( yytext );
+}
+<INITIAL>\" {
+ BEGIN(OC_DBL_LIT);
+ extendToken();
+ passThrough( yytext );
+}
+ /* Various escape sequences in literals. We don't need to get them
+ * all here. We just need to pick off the ones that could confuse us
+ * about the literal we are matchine */
+<OC_SGL_LIT,OC_DBL_LIT>\\\' extendToken(); passThrough( yytext );
+<OC_SGL_LIT,OC_DBL_LIT>\\\" extendToken(); passThrough( yytext );
+<OC_SGL_LIT,OC_DBL_LIT>\\\\ extendToken(); passThrough( yytext );
+ /* Characters in literals. */
+<OC_DBL_LIT>[^\"] extendToken(); passThrough( yytext );
+<OC_SGL_LIT>[^\'] extendToken(); passThrough( yytext );
+ /* Terminate a double literal */
+<OC_DBL_LIT>\" {
+ BEGIN(INITIAL);
+ garble();
+ passThrough( yytext );
+}
+ /* Terminate a single literal. */
+<OC_SGL_LIT>\' {
+ BEGIN(INITIAL);
+ garble();
+ passThrough( yytext );
+}
+
+ /* Whitespace. */
+<INITIAL>{WSCHAR}+ {
+ garble();
+ passThrough( yytext );
+}
+
+ /* Section Deliminator */
+<INITIAL>"%%" {
+ BEGIN(RL_INITIAL);
+ multiline = false;
+ return emitNoData( TK_Section );
+}
+
+ /* Section Deliminator */
+<INITIAL>"%%{" {
+ BEGIN(RL_INITIAL);
+ multiline = true;
+ return emitNoData( TK_Section );
+}
+
+<INITIAL>"{" {
+ garble();
+ passThrough( yytext );
+}
+
+<INITIAL>"}" {
+ garble();
+ passThrough( yytext );
+}
+
+<INITIAL>";" {
+ garble();
+ passThrough( yytext );
+}
+
+ /* Any other characters. */
+<INITIAL>. {
+ garble();
+ passThrough( yytext );
+}
+
+ /* Numbers. */
+<RL_INITIAL,IL_INITIAL>[0-9][0-9]* {
+ return emitToken( TK_UInt, yytext, yyleng );
+}
+<RL_INITIAL,IL_INITIAL>0x[0-9a-fA-F][0-9a-fA-F]* {
+ return emitToken( TK_Hex, yytext, yyleng );
+}
+
+ /* Keywords in RL and IL. */
+<RL_INITIAL>variable\ [a-zA-Z_]+ {
+ BEGIN(IL_INITIAL);
+ inlineBlockType = SemiTerminated;
+ return emitToken( KW_Variable, yytext+9, yyleng-9 );
+}
+<RL_INITIAL>access {
+ BEGIN(IL_INITIAL);
+ inlineBlockType = SemiTerminated;
+ return emitNoData( KW_Access );
+}
+<RL_INITIAL>action {
+ return emitNoData( KW_Action );
+}
+<RL_INITIAL>alphtype {
+ BEGIN(IL_INITIAL);
+ inlineWhitespace = false;
+ inlineBlockType = SemiTerminated;
+ return emitNoData( KW_AlphType );
+}
+<RL_INITIAL>getkey {
+ BEGIN(IL_INITIAL);
+ inlineBlockType = SemiTerminated;
+ return emitNoData( KW_GetKey );
+}
+<RL_INITIAL>when {
+ return emitNoData( KW_When );
+}
+<RL_INITIAL>eof {
+ return emitNoData( KW_Eof );
+}
+<RL_INITIAL>err {
+ return emitNoData( KW_Err );
+}
+<RL_INITIAL>lerr {
+ return emitNoData( KW_Lerr );
+}
+<RL_INITIAL>to {
+ return emitNoData( KW_To );
+}
+<RL_INITIAL>from {
+ return emitNoData( KW_From );
+}
+
+
+ /*
+<RL_INITIAL>range {
+ return emitNoData( KW_Range );
+}*/
+
+<RL_INITIAL>write {
+ BEGIN(RL_WRITE);
+ return emitNoData( KW_Write );
+}
+<RL_INITIAL>machine {
+ return emitNoData( KW_Machine );
+}
+<RL_INITIAL>include {
+ /* Include tokens statments are processed by both the scanner and the
+ * parser. The scanner opens the include file and switches to it and the
+ * parser invokes a new parser for handling the tokens. We use
+ * handlingInclude to indicate that the scanner is processing an include
+ * directive. Ends at ; */
+ handlingInclude = true;
+ return emitNoData( KW_Include );
+}
+
+<RL_WRITE>{WSCHAR}+ garble();
+<RL_WRITE>; {
+ BEGIN(RL_INITIAL);
+ return emitNoData( ';' );
+}
+
+ /* These must be synced in rlparse.y */
+<IL_INITIAL>fpc {
+ return emitNoData( KW_PChar );
+}
+<IL_INITIAL>fc {
+ return emitNoData( KW_Char );
+}
+<IL_INITIAL>fhold {
+ return emitNoData( KW_Hold );
+}
+<IL_INITIAL>fgoto {
+ return emitNoData( KW_Goto );
+}
+<IL_INITIAL>fcall {
+ return emitNoData( KW_Call );
+}
+<IL_INITIAL>fret {
+ return emitNoData( KW_Ret );
+}
+<IL_INITIAL>fcurs {
+ return emitNoData( KW_CurState );
+}
+<IL_INITIAL>ftargs {
+ return emitNoData( KW_TargState );
+}
+<IL_INITIAL>fentry {
+ return emitNoData( KW_Entry );
+}
+<IL_INITIAL>fnext {
+ return emitNoData( KW_Next );
+}
+<IL_INITIAL>fexec {
+ return emitNoData( KW_Exec );
+}
+<IL_INITIAL>fbreak {
+ return emitNoData( KW_Break );
+}
+
+ /* Words. */
+<RL_INITIAL,IL_INITIAL,RL_WRITE>{IDENT} {
+ return emitToken( TK_Word, yytext, yyleng );
+}
+
+ /* Begin a shell style comment. */
+<RL_INITIAL># {
+ BEGIN(RL_SHELL_COM);
+ extendToken();
+}
+ /* Data in a shell style comment. */
+<RL_SHELL_COM>[^\n]+ {
+ extendToken();
+}
+ /* Terminate a C++ style comment. */
+<RL_SHELL_COM>\n {
+ BEGIN(RL_INITIAL);
+ garble();
+}
+
+ /*
+ * Start single and double literals.
+ */
+<RL_INITIAL>' {
+ BEGIN(RL_SLIT);
+ extendToken();
+}
+<RL_INITIAL>\" {
+ BEGIN(RL_DLIT);
+ extendToken();
+}
+
+ /* Escape sequences in single and double literals. */
+<RL_SLIT,RL_DLIT>\\0 extendToken( "\0", 1 );
+<RL_SLIT,RL_DLIT>\\a extendToken( "\a", 1 );
+<RL_SLIT,RL_DLIT>\\b extendToken( "\b", 1 );
+<RL_SLIT,RL_DLIT>\\t extendToken( "\t", 1 );
+<RL_SLIT,RL_DLIT>\\n extendToken( "\n", 1 );
+<RL_SLIT,RL_DLIT>\\v extendToken( "\v", 1 );
+<RL_SLIT,RL_DLIT>\\f extendToken( "\f", 1 );
+<RL_SLIT,RL_DLIT>\\r extendToken( "\r", 1 );
+<RL_SLIT,RL_DLIT>\\\n extendToken();
+<RL_SLIT,RL_DLIT>\\. extendToken( yytext+1, 1 );
+
+ /* Characters in literals. */
+<RL_SLIT>[^'] extendToken( yytext, 1 );
+<RL_DLIT>[^"] extendToken( yytext, 1 );
+
+ /* Terminate a single literal. */
+<RL_SLIT>'[i]* {
+ BEGIN(RL_INITIAL);
+ return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 );
+}
+ /* Terminate a double literal */
+<RL_DLIT>\"[i]* {
+ BEGIN(RL_INITIAL);
+ return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 );
+}
+
+ /*
+ * Start an OR expression.
+ */
+<RL_INITIAL>"[" {
+ BEGIN(RL_OREXP);
+ return emitNoData( RE_SqOpen );
+}
+
+<RL_INITIAL>"\[^" {
+ BEGIN(RL_OREXP);
+ return emitNoData( RE_SqOpenNeg );
+}
+
+ /* Escape sequences in OR expressions. */
+<RL_OREXP>\\0 { return emitToken( RE_Char, "\0", 1 ); }
+<RL_OREXP>\\a { return emitToken( RE_Char, "\a", 1 ); }
+<RL_OREXP>\\b { return emitToken( RE_Char, "\b", 1 ); }
+<RL_OREXP>\\t { return emitToken( RE_Char, "\t", 1 ); }
+<RL_OREXP>\\n { return emitToken( RE_Char, "\n", 1 ); }
+<RL_OREXP>\\v { return emitToken( RE_Char, "\v", 1 ); }
+<RL_OREXP>\\f { return emitToken( RE_Char, "\f", 1 ); }
+<RL_OREXP>\\r { return emitToken( RE_Char, "\r", 1 ); }
+<RL_OREXP>\\\n { garble(); }
+<RL_OREXP>\\. { return emitToken( RE_Char, yytext+1, 1 ); }
+
+ /* Range dash in an OR expression. */
+<RL_OREXP>- {
+ return emitNoData( RE_Dash );
+}
+
+ /* Characters in an OR expression. */
+<RL_OREXP>[^\]] {
+ return emitToken( RE_Char, yytext, 1 );
+}
+
+ /* Terminate an OR expression. */
+<RL_OREXP>\] {
+ BEGIN(RL_INITIAL);
+ return emitNoData( RE_SqClose );
+}
+
+ /*
+ * Start a regular expression.
+ */
+<RL_INITIAL>\/ {
+ BEGIN(RL_REGEXP);
+ return emitNoData( RE_Slash );
+}
+
+ /* Escape sequences in regular expressions. */
+<RL_REGEXP,RL_REGEXP_OR>\\0 {
+ return emitToken( RE_Char, "\0", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\a {
+ return emitToken( RE_Char, "\a", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\b {
+ return emitToken( RE_Char, "\b", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\t {
+ return emitToken( RE_Char, "\t", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\n {
+ return emitToken( RE_Char, "\n", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\v {
+ return emitToken( RE_Char, "\v", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\f {
+ return emitToken( RE_Char, "\f", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\r {
+ return emitToken( RE_Char, "\r", 1 );
+}
+<RL_REGEXP,RL_REGEXP_OR>\\\n {
+ garble();
+}
+<RL_REGEXP,RL_REGEXP_OR>\\. {
+ return emitToken( RE_Char, yytext+1, 1 );
+}
+
+ /* Special characters in a regular expression. */
+<RL_REGEXP>\. {
+ return emitNoData( RE_Dot );
+}
+<RL_REGEXP>\* {
+ return emitNoData( RE_Star );
+}
+<RL_REGEXP>"\[^" {
+ BEGIN(RL_REGEXP_OR);
+ return emitNoData( RE_SqOpenNeg );
+}
+<RL_REGEXP>"\[" {
+ BEGIN(RL_REGEXP_OR);
+ return emitNoData( RE_SqOpen );
+}
+
+ /* Range dash in a regular expression or set. */
+<RL_REGEXP_OR>- {
+ return emitNoData( RE_Dash );
+}
+
+ /* Terminate an or set or a regular expression. */
+<RL_REGEXP_OR>\] {
+ BEGIN(RL_REGEXP);
+ return emitNoData( RE_SqClose );
+}
+
+ /* Characters in a regular expression. */
+<RL_REGEXP,RL_REGEXP_OR>[^/] {
+ return emitToken( RE_Char, yytext, 1 );
+}
+
+ /* Terminate a regular expression */
+<RL_REGEXP,RL_REGEXP_OR>\/[i]* {
+ BEGIN(RL_INITIAL);
+ return emitToken( RE_Slash, yytext, yyleng );
+}
+
+ /* Builtin code move to Builtin initial. */
+<RL_INITIAL>"{" {
+ if ( openMachineSpecBlock() ) {
+ /* Plain bracket. */
+ return emitNoData( *yytext );
+ }
+ else {
+ /* Start an inline code block. Keep track of where it started in case
+ * it terminates prematurely. Return the open bracket. */
+ BEGIN(IL_INITIAL);
+ inlineBlockType = CurlyDelimited;
+ il_code_first_line = id->last_line;
+ il_code_first_column = id->last_column+1;
+ builtinBrace++;
+ return emitNoData( *yytext );
+ }
+}
+
+<RL_INITIAL>\.\. {
+ return emitNoData( TK_DotDot );
+}
+
+<RL_INITIAL>:> {
+ return emitNoData( TK_ColonGt );
+}
+
+<RL_INITIAL>:>> {
+ return emitNoData( TK_ColonGtGt );
+}
+
+<RL_INITIAL><: {
+ return emitNoData( TK_LtColon );
+}
+
+<RL_INITIAL>-- {
+ return emitNoData( TK_DashDash );
+}
+
+ /* The instantiation operator. */
+<RL_INITIAL>:= {
+ return emitNoData( TK_ColonEquals );
+}
+
+ /* Error actions. */
+<RL_INITIAL>\>\! {
+ return emitNoData( TK_StartGblError );
+}
+<RL_INITIAL>\$\! {
+ return emitNoData( TK_AllGblError );
+}
+<RL_INITIAL>%\! {
+ return emitNoData( TK_FinalGblError );
+}
+<RL_INITIAL><\! {
+ return emitNoData( TK_NotStartGblError );
+}
+<RL_INITIAL>@\! {
+ return emitNoData( TK_NotFinalGblError );
+}
+<RL_INITIAL><>\! {
+ return emitNoData( TK_MiddleGblError );
+}
+
+ /* Local error actions. */
+<RL_INITIAL>\>\^ {
+ return emitNoData( TK_StartLocalError );
+}
+<RL_INITIAL>\$\^ {
+ return emitNoData( TK_AllLocalError );
+}
+<RL_INITIAL>%\^ {
+ return emitNoData( TK_FinalLocalError );
+}
+<RL_INITIAL><\^ {
+ return emitNoData( TK_NotStartLocalError );
+}
+<RL_INITIAL>@\^ {
+ return emitNoData( TK_NotFinalLocalError );
+}
+<RL_INITIAL><>\^ {
+ return emitNoData( TK_MiddleLocalError );
+}
+
+ /* EOF Actions. */
+<RL_INITIAL>\>\/ {
+ return emitNoData( TK_StartEOF );
+}
+<RL_INITIAL>\$\/ {
+ return emitNoData( TK_AllEOF );
+}
+<RL_INITIAL>%\/ {
+ return emitNoData( TK_FinalEOF );
+}
+<RL_INITIAL><\/ {
+ return emitNoData( TK_NotStartEOF );
+}
+<RL_INITIAL>@\/ {
+ return emitNoData( TK_NotFinalEOF );
+}
+<RL_INITIAL><>\/ {
+ return emitNoData( TK_MiddleEOF );
+}
+
+ /* To State Actions. */
+<RL_INITIAL>\>~ {
+ return emitNoData( TK_StartToState );
+}
+<RL_INITIAL>\$~ {
+ return emitNoData( TK_AllToState );
+}
+<RL_INITIAL>%~ {
+ return emitNoData( TK_FinalToState );
+}
+<RL_INITIAL><~ {
+ return emitNoData( TK_NotStartToState );
+}
+<RL_INITIAL>@~ {
+ return emitNoData( TK_NotFinalToState );
+}
+<RL_INITIAL><>~ {
+ return emitNoData( TK_MiddleToState );
+}
+
+ /* From State Actions. */
+<RL_INITIAL>\>\* {
+ return emitNoData( TK_StartFromState );
+}
+<RL_INITIAL>\$\* {
+ return emitNoData( TK_AllFromState );
+}
+<RL_INITIAL>%\* {
+ return emitNoData( TK_FinalFromState );
+}
+<RL_INITIAL><\* {
+ return emitNoData( TK_NotStartFromState );
+}
+<RL_INITIAL>@\* {
+ return emitNoData( TK_NotFinalFromState );
+}
+<RL_INITIAL><>\* {
+ return emitNoData( TK_MiddleFromState );
+}
+
+<RL_INITIAL><> {
+ return emitNoData( TK_Middle );
+}
+
+<RL_INITIAL>\>\? {
+ return emitNoData( TK_StartCond );
+}
+<RL_INITIAL>\$\? {
+ return emitNoData( TK_AllCond );
+}
+<RL_INITIAL>%\? {
+ return emitNoData( TK_LeavingCond );
+}
+
+ /* The Arrow operator. */
+<RL_INITIAL>-> {
+ return emitNoData( TK_Arrow );
+}
+
+ /* The double arrow operator. */
+<RL_INITIAL>=> {
+ return emitNoData( TK_DoubleArrow );
+}
+
+ /* Double star (longest match kleene star). */
+<RL_INITIAL>\*\* {
+ return emitNoData( TK_StarStar );
+}
+
+ /* Name separator. */
+<RL_INITIAL>:: {
+ return emitNoData( TK_NameSep );
+}
+
+ /* Opening of longest match. */
+<RL_INITIAL>\|\* {
+ return emitNoData( TK_BarStar );
+}
+
+ /* Catch the repetition operator now to free up the parser. Once caught,
+ * Send only the opening brace and rescan the rest so it can be broken
+ * up for the parser. */
+<RL_INITIAL>\{([0-9]+(,[0-9]*)?|,[0-9]+)\} {
+ yyless(1);
+ return emitNoData( TK_RepOpOpen );
+}
+
+ /* Section Deliminator */
+<RL_INITIAL>"}%%" {
+ BEGIN(INITIAL);
+ return emitNoData( TK_Section );
+}
+
+ /* Whitespace. */
+<RL_INITIAL>[\t\v\f\r ] garble();
+<RL_INITIAL>\n {
+ if ( multiline )
+ garble();
+ else {
+ BEGIN(INITIAL);
+ return emitNoData( TK_SectionNL );
+ }
+}
+
+ /* Any other characters. */
+<RL_INITIAL>. {
+ return emitNoData( *yytext );
+}
+
+ /* End of input in a literal is an error. */
+<RL_SLIT,RL_DLIT><<EOF>> {
+ error(id->first_line, id->first_column) << "unterminated literal" << endl;
+ exit(1);
+}
+
+ /* End of input in a comment is an error. */
+<RL_SHELL_COM><<EOF>> {
+ error(id->first_line, id->first_column) << "unterminated comment" << endl;
+ exit(1);
+}
+
+ /* Begin a C style comment. */
+<IL_INITIAL>"/*" {
+ BEGIN(IL_C_COM);
+ il_comm_lit_first_line = id->last_line;
+ il_comm_lit_first_column = id->last_column+1;
+ extendToken( yytext, yyleng );
+}
+ /* Data in a C style comment. */
+<IL_C_COM>\n extendToken( yytext, 1 );
+<IL_C_COM>. extendToken( yytext, 1 );
+
+ /* Terminate a C style comment. */
+<IL_C_COM>"*/" {
+ BEGIN(IL_INITIAL);
+ return emitToken( IL_Comment, yytext, 2 );
+}
+
+ /* Begin a C++ style comment. */
+<IL_INITIAL>"//" {
+ BEGIN(IL_CXX_COM);
+ il_comm_lit_first_line = id->last_line;
+ il_comm_lit_first_column = id->last_column+1;
+ extendToken( yytext, yyleng );
+}
+ /* Data in a C++ style comment. */
+<IL_CXX_COM>[^\n]+ {
+ extendToken( yytext, yyleng );
+}
+ /* Terminate a C++ style comment. */
+<IL_CXX_COM>\n {
+ BEGIN(IL_INITIAL);
+ return emitToken( IL_Comment, yytext, 1 );
+}
+
+
+ /* Start literals. */
+<IL_INITIAL>' {
+ BEGIN(IL_SGL_LIT);
+ il_comm_lit_first_line = id->last_line;
+ il_comm_lit_first_column = id->last_column+1;
+ extendToken( yytext, 1 );
+}
+<IL_INITIAL>\" {
+ BEGIN(IL_DBL_LIT);
+ il_comm_lit_first_line = id->last_line;
+ il_comm_lit_first_column = id->last_column+1;
+ extendToken( yytext, 1 );
+}
+ /* Various escape sequences in literals. We don't need to get them
+ * all here. We just need to pick off the ones that could confuse us
+ * about the literal we are matching */
+<IL_SGL_LIT,IL_DBL_LIT>\\' extendToken( yytext, yyleng );
+<IL_SGL_LIT,IL_DBL_LIT>\\\" extendToken( yytext, yyleng );
+<IL_SGL_LIT,IL_DBL_LIT>\\\\ extendToken( yytext, yyleng );
+ /* Characters in literals. */
+<IL_DBL_LIT>[^\"] extendToken( yytext, 1 );
+<IL_SGL_LIT>[^'] extendToken( yytext, 1 );
+
+ /* Terminate a double literal */
+<IL_DBL_LIT>\" {
+ BEGIN(IL_INITIAL);
+ return emitToken( IL_Literal, yytext, 1 );
+}
+ /* Terminate a single literal. */
+<IL_SGL_LIT>' {
+ BEGIN(IL_INITIAL);
+ return emitToken( IL_Literal, yytext, 1 );
+}
+
+ /* Open Brace, increment count of open braces. */
+<IL_INITIAL>"{" {
+ builtinBrace++;
+ return emitToken( IL_Symbol, yytext, 1 );
+}
+
+ /* Close brace, decrement count of open braces. */
+<IL_INITIAL>"}" {
+ builtinBrace--;
+ if ( inlineBlockType == CurlyDelimited && builtinBrace == 0 ) {
+ /* Inline code block ends. */
+ BEGIN(RL_INITIAL);
+ inlineWhitespace = true;
+ return emitNoData( *yytext );
+ }
+ else {
+ /* Either a semi terminated inline block or only the closing brace of
+ * some inner scope, not the block's closing brace. */
+ return emitToken( IL_Symbol, yytext, 1 );
+ }
+}
+
+ /* May need to terminate the inline block. */
+<IL_INITIAL>; {
+ if ( inlineBlockType == SemiTerminated ) {
+ /* Inline code block ends. */
+ BEGIN(RL_INITIAL);
+ inlineWhitespace = true;
+ return emitNoData( TK_Semi );
+ }
+ else {
+ /* Not ending. The semi is sent as a token, not a generic symbol. */
+ return emitNoData( *yytext );
+ }
+}
+
+ /* Catch some symbols so they can be
+ * sent as tokens instead as generic symbols. */
+<IL_INITIAL>[*()] {
+ return emitNoData( *yytext );
+}
+<IL_INITIAL>:: {
+ return emitNoData( TK_NameSep );
+}
+
+ /* Whitespace. */
+<IL_INITIAL>{WSCHAR}+ {
+ if ( inlineWhitespace )
+ return emitToken( IL_WhiteSpace, yytext, yyleng );
+}
+
+ /* Any other characters. */
+<IL_INITIAL>. {
+ return emitToken( IL_Symbol, yytext, 1 );
+}
+
+<INITIAL><<EOF>> {
+ /* If we are not at the bottom of the include stack, then pop the current
+ * file that we are scanning. Since we are always returning 0 to the parser
+ * it will exit and return to the parser that called it. */
+ if ( inc_stack_ptr > 0 )
+ popInclude();
+ return 0;
+}
+
+ /* End of input in a literal is an error. */
+<IL_SGL_LIT,IL_DBL_LIT><<EOF>> {
+ error(il_comm_lit_first_line, il_comm_lit_first_column) <<
+ "unterminated literal" << endl;
+ exit(1);
+}
+
+ /* End of input in a comment is an error. */
+<IL_C_COM,IL_CXX_COM><<EOF>> {
+ error(il_comm_lit_first_line, il_comm_lit_first_column) <<
+ "unterminated comment" << endl;
+ exit(1);
+}
+
+ /* End of intput in a code block. */
+<IL_INITIAL><<EOF>> {
+ error(il_code_first_line, il_code_first_column) <<
+ "unterminated code block" << endl;
+ exit(1);
+}
+
+%%
+
+/* Write out token data, escaping special charachters. */
+#ifdef WANT_TOKEN_WRITE
+void writeToken( int token, char *data )
+{
+ cout << "token id " << token << " at " << id->fileName << ":" <<
+ yylloc->first_line << ":" << yylloc->first_column << "-" <<
+ yylloc->last_line << ":" << yylloc->last_column << " ";
+
+ if ( data != 0 ) {
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '\n': cout << "\\n"; break;
+ case '\t': cout << "\\t"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+ }
+ cout << endl;
+}
+#endif
+
+/* Caclulate line info from yytext. Called on every pattern match. */
+void updateLineInfo()
+{
+ /* yytext should always have at least one char. */
+ assert( yytext[0] != 0 );
+
+ /* Scan through yytext up to the last character. */
+ char *p = yytext;
+ for ( ; p[1] != 0; p++ ) {
+ if ( p[0] == '\n' ) {
+ id->last_line += 1;
+ id->last_column = 0;
+ }
+ else {
+ id->last_column += 1;
+ }
+ }
+
+ /* Always consider the last character as not a newline. Newlines at the
+ * end of a token are as any old character at the end of the line. */
+ id->last_column += 1;
+
+ /* The caller may be about to emit a token, be prepared to pass the line
+ * info to the parser. */
+ yylloc->first_line = id->first_line;
+ yylloc->first_column = id->first_column;
+ yylloc->last_line = id->last_line;
+ yylloc->last_column = id->last_column;
+
+ /* If the last character was indeed a newline, then wrap ahead now. */
+ if ( p[0] == '\n' ) {
+ id->last_line += 1;
+ id->last_column = 0;
+ }
+}
+
+/* Eat up a matched pattern that will not be part of a token. */
+void garble()
+{
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* The next token starts ahead of the last token. */
+ id->first_line = id->last_line;
+ id->first_column = id->last_column + 1;
+}
+
+/* Append data to the end of the token. More token data expected. */
+void extendToken( char *data, int len )
+{
+ if ( data != 0 && len > 0 )
+ tokbuf.append( data, len );
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+}
+
+/* Extend, but with no data, more data to come. */
+void extendToken()
+{
+ /* Update line information from yytext. */
+ updateLineInfo();
+}
+
+
+/* Possibly process include data. */
+void processInclude( int token )
+{
+ static char *incFileName = 0;
+
+ if ( handlingInclude ) {
+ if ( token == KW_Include )
+ incFileName = 0;
+ else if ( token == TK_Literal )
+ incFileName = yylval->data.data;
+ else if ( token == ';' ) {
+ /* Terminate the include statement. Start reading from included file. */
+ handlingInclude = false;
+
+ if ( id->active && includeDepth < INCLUDE_STACK_SIZE ) {
+ /* If there is no section name or input file, default to the curren values. */
+ if ( incFileName == 0 )
+ incFileName = id->fileName;
+
+ /* Make the new buffer and switch to it. */
+ FILE *incFile = fopen( incFileName, "rt" );
+ if ( incFile != 0 ) {
+ buff_stack[inc_stack_ptr] = YY_CURRENT_BUFFER;
+ multiline_stack[inc_stack_ptr] = multiline;
+ inc_stack_ptr += 1;
+ yy_switch_to_buffer( yy_create_buffer( incFile, YY_BUF_SIZE ) );
+ BEGIN(INITIAL);
+ }
+ else {
+ error(*yylloc) << "could not locate include file \"" << incFileName
+ << "\"" << endl;
+ }
+ }
+ }
+ }
+}
+
+void popInclude()
+{
+ /* Free the current buffer and move to the previous. */
+ yy_delete_buffer( YY_CURRENT_BUFFER );
+ inc_stack_ptr -= 1;
+ yy_switch_to_buffer( buff_stack[inc_stack_ptr] );
+ multiline = multiline_stack[inc_stack_ptr];
+
+ /* Includes get called only from RL_INITIAL. */
+ BEGIN(RL_INITIAL);
+}
+
+
+/* Append data to the end of a token and emitToken it to the parser. */
+int emitToken( int token, char *data, int len )
+{
+ /* Append any new data. */
+ if ( data != 0 && len > 0 )
+ tokbuf.append( data, len );
+
+ /* Duplicate the buffer. */
+ yylval->data.length = tokbuf.length;
+ yylval->data.data = new char[tokbuf.length+1];
+ memcpy( yylval->data.data, tokbuf.data, tokbuf.length );
+ yylval->data.data[tokbuf.length] = 0;
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* Write token info. */
+#ifdef WANT_TOKEN_WRITE
+ writeToken( token, tokbuf.data );
+#endif
+
+ /* Clear out the buffer. */
+ tokbuf.clear();
+
+ /* The next token starts ahead of the last token. */
+ id->first_line = id->last_line;
+ id->first_column = id->last_column + 1;
+
+ /* Maintain a record of two tokens back. */
+ previous_tokens[1] = previous_tokens[0];
+ previous_tokens[0] = token;
+
+ /* Possibly process the include statement; */
+ processInclude( token );
+
+ return token;
+}
+
+/* Emit a token with no data to the parser. */
+int emitNoData( int token )
+{
+ /* Return null to the parser. */
+ yylval->data.data = 0;
+ yylval->data.length = 0;
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* Write token info. */
+#ifdef WANT_TOKEN_WRITE
+ writeToken( token, 0 );
+#endif
+
+ /* Clear out the buffer. */
+ tokbuf.clear();
+
+ /* The next token starts ahead of the last token. */
+ id->first_line = id->last_line;
+ id->first_column = id->last_column + 1;
+
+ /* Maintain a record of two tokens back. */
+ previous_tokens[1] = previous_tokens[0];
+ previous_tokens[0] = token;
+
+ /* Possibly process the include statement; */
+ processInclude( token );
+
+ return token;
+}
+
+/* Pass tokens in outter code through to the output. */
+void passThrough( char *data )
+{
+ /* If no errors and we are at the bottom of the include stack (the source
+ * file listed on the command line) then write out the data. */
+ if ( gblErrorCount == 0 && inc_stack_ptr == 0 &&
+ machineSpec == 0 && machineName == 0 )
+ {
+ xmlEscapeHost( *outStream, data );
+ }
+}
+
+/* Init a buffer. */
+Buffer::Buffer()
+:
+ data(0),
+ length(0),
+ allocated(0)
+{
+}
+
+/* Empty out a buffer on destruction. */
+Buffer::~Buffer()
+{
+ empty();
+}
+
+/* Free the space allocated for the buffer. */
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+/* Grow the buffer when to len allocation. */
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+int yywrap()
+{
+ /* Once processessing of the input is done, signal no more. */
+ return 1;
+}
+
+/* Here simply to suppress the unused yyunpt warning. */
+void thisFuncIsNeverCalled()
+{
+ yyunput(0, 0);
+}
+
+/* Put the scannner back into the outside code start state. */
+void beginOutsideCode()
+{
+ BEGIN(INITIAL);
+}
+
+/* Determine if we are opening a machine specification block. */
+bool openMachineSpecBlock()
+{
+ if ( previous_tokens[1] == TK_Section && previous_tokens[0] == TK_Word )
+ return true;
+ else if ( previous_tokens[0] == TK_Section )
+ return true;
+ return false;
+}
+
+/* Wrapper for the lexer which stores the locations of the value and location
+ * variables of the parser into globals. The parser is reentrant, however the scanner
+ * does not need to be, so globals work fine. This saves us passing them around
+ * all the helper functions. */
+int yylex( YYSTYPE *yylval, YYLTYPE *yylloc )
+{
+ ::yylval = yylval;
+ ::yylloc = yylloc;
+ return ragel_lex();
+}
+
diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl
new file mode 100644
index 0000000..b1396f1
--- /dev/null
+++ b/ragel/rlscan.rl
@@ -0,0 +1,907 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* This is used for tracking the current stack of include file/machine pairs. It is
+ * is used to detect and recursive include structure. */
+struct IncludeStackItem
+{
+ IncludeStackItem( char *fileName, char *sectionName )
+ : fileName(fileName), sectionName(sectionName) {}
+
+ char *fileName;
+ char *sectionName;
+};
+
+typedef Vector<IncludeStackItem> IncludeStack;
+IncludeStack includeStack;
+
+enum InlineBlockType
+{
+ CurlyDelimited,
+ SemiTerminated
+};
+
+struct Scanner
+{
+ Scanner( char *fileName, istream &input,
+ Parser *inclToParser, char *inclSectionTarg,
+ int include_depth )
+ :
+ fileName(fileName), input(input),
+ inclToParser(inclToParser),
+ inclSectionTarg(inclSectionTarg),
+ include_depth(include_depth),
+ line(1), column(1), lastnl(0),
+ parser(0), active(false),
+ parserExistsError(false), ragelDefOpen(false),
+ whitespaceOn(true)
+ {}
+
+ bool recursiveInclude( IncludeStack &includeStack,
+ char *inclFileName, char *inclSectionName );
+
+ char *prepareFileName( char *fileName, int len )
+ {
+ bool caseInsensitive;
+ Token tokenFnStr, tokenRes;
+ tokenFnStr.data = fileName;
+ tokenFnStr.length = len;
+ tokenFnStr.prepareLitString( tokenRes, caseInsensitive );
+ return tokenRes.data;
+ }
+
+ void init();
+ void token( int type, char *start, char *end );
+ void token( int type, char *string );
+ void token( int type );
+ void updateCol();
+ void startSection();
+ void endSection();
+ void openRagelDef();
+ void do_scan();
+ bool parserExists();
+ ostream &error();
+
+ char *fileName;
+ istream &input;
+ Parser *inclToParser;
+ char *inclSectionTarg;
+ int include_depth;
+
+ int cs;
+ int line;
+ char *word, *lit;
+ int word_len, lit_len;
+ InputLoc sectionLoc;
+ char *tokstart, *tokend;
+ int column;
+ char *lastnl;
+
+ /* Set by machine statements, these persist from section to section
+ * allowing for unnamed sections. */
+ Parser *parser;
+ bool active;
+
+ /* This is set if ragel has already emitted an error stating that
+ * no section name has been seen and thus no parser exists. */
+ bool parserExistsError;
+ bool ragelDefOpen;
+
+ /* This is for inline code. By default it is on. It goes off for
+ * statements and values in inline blocks which are parsed. */
+ bool whitespaceOn;
+};
+
+%%{
+ machine section_parse;
+ alphtype int;
+ write data;
+}%%
+
+void Scanner::init( )
+{
+ %% write init;
+}
+
+bool Scanner::parserExists()
+{
+ if ( parser != 0 )
+ return true;
+
+ if ( ! parserExistsError ) {
+ error() << "include: there is no previous specification name" << endl;
+ parserExistsError = true;
+ }
+ return false;
+}
+
+ostream &Scanner::error()
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+
+ cerr << fileName << ":" << line << ":" << column << ": ";
+ return cerr;
+}
+
+bool Scanner::recursiveInclude( IncludeStack &includeStack,
+ char *inclFileName, char *inclSectionName )
+{
+ for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
+ if ( strcmp( si->fileName, inclFileName ) == 0 &&
+ strcmp( si->sectionName, inclSectionName ) == 0 )
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void Scanner::updateCol()
+{
+ char *from = lastnl;
+ if ( from == 0 )
+ from = tokstart;
+ //cerr << "adding " << tokend - from << " to column" << endl;
+ column += tokend - from;
+ lastnl = 0;
+}
+
+void Scanner::token( int type, char *string )
+{
+ token( type, string, string + strlen(string) );
+}
+
+void Scanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
+%%{
+ machine section_parse;
+
+ # This relies on the the kelbt implementation and the order
+ # that tokens are declared.
+ KW_Machine = 128;
+ KW_Include = 129;
+ KW_Write = 130;
+ TK_Word = 131;
+ TK_Literal = 132;
+
+ action clear_words { word = lit = 0; word_len = lit_len = 0; }
+ action store_word { word = tokdata; word_len = toklen; }
+ action store_lit { lit = tokdata; lit_len = toklen; }
+
+ action mach_err { error() << "bad machine statement" << endl; }
+ action incl_err { error() << "bad include statement" << endl; }
+ action write_err { error() << "bad write statement" << endl; }
+
+ action handle_machine
+ {
+ /* Assign a name to the machine. */
+ char *machine = word;
+ //cerr << "scanner: machine statement: " << machine << endl;
+
+ if ( inclSectionTarg == 0 ) {
+ active = true;
+
+ ParserDictEl *pdEl = parserDict.find( machine );
+ if ( pdEl != 0 ) {
+ //cerr << "scanner: using existing parser" << endl;
+ }
+ else {
+ //cerr << "scanner: creating a new parser" << endl;
+ pdEl = new ParserDictEl( machine );
+ pdEl->value = new Parser( fileName, machine, sectionLoc );
+ pdEl->value->init();
+ parserDict.insert( pdEl );
+ }
+
+ parser = pdEl->value;
+ }
+ else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
+ //cerr << "scanner: found include target" << endl;
+ active = true;
+ parser = inclToParser;
+ }
+ else {
+ //cerr << "scanner: ignoring section" << endl;
+ active = false;
+ parser = 0;
+ }
+ }
+
+ machine_stmt =
+ ( KW_Machine TK_Word @store_word ';' ) @handle_machine
+ <>err mach_err <>eof mach_err;
+
+ action handle_include
+ {
+ if ( active && parserExists() ) {
+ char *inclSectionName = word;
+ char *inclFileName = 0;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inclSectionName == 0 )
+ inclSectionName = parser->sectionName;
+
+ if ( lit != 0 )
+ inclFileName = prepareFileName( lit, lit_len );
+ else
+ inclFileName = fileName;
+
+ /* Open the file and process it. */
+ //cerr << "scanner: include: " << inclSectionName << " " << inclFileName << endl;
+
+ /* Check for a recursive include structure. Add the current file/section
+ * name then check if what we are including is already in the stack. */
+ includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
+
+ if ( recursiveInclude( includeStack, inclFileName, inclSectionName ) )
+ error() << "include: this is a recursive include operation" << endl;
+ else {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inclFileName );
+ if ( ! inFile->is_open() ) {
+ error() << "include: could not open " <<
+ inclFileName << " for reading" << endl;
+ }
+
+ Scanner scanner( inclFileName, *inFile, parser,
+ inclSectionName, include_depth+1 );
+ scanner.init();
+ scanner.do_scan( );
+ delete inFile;
+ }
+
+ /* Remove the last element (len-1) */
+ includeStack.remove( -1 );
+ }
+ }
+
+ include_names = (
+ TK_Word @store_word ( TK_Literal @store_lit )? |
+ TK_Literal @store_lit
+ ) >clear_words;
+
+ include_stmt =
+ ( KW_Include include_names ';' ) @handle_include
+ <>err incl_err <>eof incl_err;
+
+ action write_command
+ {
+ if ( active ) {
+ openRagelDef();
+ if ( strcmp( tokdata, "data" ) != 0 &&
+ strcmp( tokdata, "init" ) != 0 &&
+ strcmp( tokdata, "exec" ) != 0 &&
+ strcmp( tokdata, "eof" ) != 0 )
+ {
+ error() << "unknown write command" << endl;
+ }
+ *outStream << " <write what=\"" << tokdata << "\">";
+ }
+ }
+
+ action write_option
+ {
+ if ( active )
+ *outStream << "<option>" << tokdata << "</option>";
+ }
+ action write_close
+ {
+ if ( active )
+ *outStream << "</write>\n";
+ }
+
+ write_stmt =
+ ( KW_Write TK_Word @write_command
+ ( TK_Word @write_option )* ';' @write_close )
+ <>err write_err <>eof write_err;
+
+ action handle_token
+ {
+ /* Send the token off to the parser. */
+ if ( active && parserExists() ) {
+ InputLoc loc;
+
+ //cerr << "scanner:" << line << ":" << column <<
+ // ": sending token to the parser " << lelNames[*p];
+ //if ( tokdata != 0 )
+ // cerr << " " << tokdata;
+ //cerr << endl;
+
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = column;
+
+ parser->token( loc, type, tokdata, toklen );
+ }
+ }
+
+ # Catch everything else.
+ everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+
+ main := (
+ machine_stmt |
+ include_stmt |
+ write_stmt |
+ everything_else
+ )*;
+}%%
+
+void Scanner::token( int type, char *start, char *end )
+{
+ char *tokdata = 0;
+ int toklen = 0;
+ int *p = &type;
+ int *pe = &type + 1;
+
+ if ( start != 0 ) {
+ toklen = end-start;
+ tokdata = new char[toklen+1];
+ memcpy( tokdata, start, toklen );
+ tokdata[toklen] = 0;
+ }
+
+ %%{
+ machine section_parse;
+ write exec;
+ }%%
+
+ updateCol();
+}
+
+void Scanner::startSection( )
+{
+ parserExistsError = false;
+
+ if ( include_depth == 0 ) {
+ if ( machineSpec == 0 && machineName == 0 )
+ *outStream << "</host>\n";
+ ragelDefOpen = false;
+ }
+
+ sectionLoc.fileName = fileName;
+ sectionLoc.line = line;
+ sectionLoc.col = 0;
+}
+
+void Scanner::openRagelDef()
+{
+ if ( ! ragelDefOpen ) {
+ ragelDefOpen = true;
+ *outStream << "<ragel_def name=\"" << parser->sectionName << "\">\n";
+ }
+}
+
+void Scanner::endSection( )
+{
+ /* Execute the eof actions for the section parser. */
+ %%{
+ machine section_parse;
+ write eof;
+ }%%
+
+ /* Close off the section with the parser. */
+ if ( active && parserExists() ) {
+ InputLoc loc;
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = 0;
+
+ parser->token( loc, TK_EndSection, 0, 0 );
+ }
+
+ if ( include_depth == 0 ) {
+ if ( ragelDefOpen ) {
+ *outStream << "</ragel_def>\n";
+ ragelDefOpen = false;
+ }
+
+ if ( machineSpec == 0 && machineName == 0 ) {
+ /* The end section may include a newline on the end, so
+ * we use the last line, which will count the newline. */
+ *outStream << "<host line=\"" << line << "\">";
+ }
+ }
+}
+
+%%{
+ machine rlscan;
+
+ # This is sent by the driver code.
+ EOF = 0;
+
+ action inc_nl {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ NL = '\n' @inc_nl;
+
+ # Identifiers, numbers, commetns, and other common things.
+ ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
+ number = digit+;
+ hex_number = '0x' [0-9a-fA-F]+;
+
+ c_comment =
+ '/*' ( any | NL )* :>> '*/';
+
+ cpp_comment =
+ '//' [^\n]* NL;
+
+ c_cpp_comment = c_comment | cpp_comment;
+
+ # These literal forms are common to C-like host code and ragel.
+ s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
+ d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+
+ whitespace = [ \t] | NL;
+ pound_comment = '#' [^\n]* NL;
+
+ # An inline block of code. This is specified as a scanned, but is sent to
+ # the parser as one long block. The inline_block pointer is used to handle
+ # the preservation of the data.
+ inline_code := |*
+ # Inline expression keywords.
+ "fpc" => { token( KW_PChar ); };
+ "fc" => { token( KW_Char ); };
+ "fcurs" => { token( KW_CurState ); };
+ "ftargs" => { token( KW_TargState ); };
+ "fentry" => {
+ whitespaceOn = false;
+ token( KW_Entry );
+ };
+
+ # Inline statement keywords.
+ "fhold" => {
+ whitespaceOn = false;
+ token( KW_Hold );
+ };
+ "fexec" => { token( KW_Exec, 0, 0 ); };
+ "fgoto" => {
+ whitespaceOn = false;
+ token( KW_Goto );
+ };
+ "fnext" => {
+ whitespaceOn = false;
+ token( KW_Next );
+ };
+ "fcall" => {
+ whitespaceOn = false;
+ token( KW_Call );
+ };
+ "fret" => {
+ whitespaceOn = false;
+ token( KW_Ret );
+ };
+ "fbreak" => {
+ whitespaceOn = false;
+ token( KW_Break );
+ };
+
+ ident => { token( TK_Word, tokstart, tokend ); };
+
+ number => { token( TK_UInt, tokstart, tokend ); };
+ hex_number => { token( TK_Hex, tokstart, tokend ); };
+
+ ( s_literal | d_literal )
+ => { token( IL_Literal, tokstart, tokend ); };
+
+ whitespace+ => {
+ if ( whitespaceOn )
+ token( IL_WhiteSpace, tokstart, tokend );
+ };
+ c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
+
+ "::" => { token( TK_NameSep, tokstart, tokend ); };
+
+ # Some symbols need to go to the parser as with their cardinal value as
+ # the token type (as opposed to being sent as anonymous symbols)
+ # because they are part of the sequences which we interpret. The * ) ;
+ # symbols cause whitespace parsing to come back on. This gets turned
+ # off by some keywords.
+
+ ";" => {
+ whitespaceOn = true;
+ token( *tokstart, tokstart, tokend );
+ if ( inlineBlockType == SemiTerminated )
+ fgoto parser_def;
+ };
+
+ [*)] => {
+ whitespaceOn = true;
+ token( *tokstart, tokstart, tokend );
+ };
+
+ [,(] => { token( *tokstart, tokstart, tokend ); };
+
+ '{' => {
+ token( IL_Symbol, tokstart, tokend );
+ curly_count += 1;
+ };
+
+ '}' => {
+ if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+ /* Inline code block ends. */
+ token( '}' );
+ fgoto parser_def;
+ }
+ else {
+ /* Either a semi terminated inline block or only the closing
+ * brace of some inner scope, not the block's closing brace. */
+ token( IL_Symbol, tokstart, tokend );
+ }
+ };
+
+ # Send every other character as a symbol.
+ any => { token( IL_Symbol, tokstart, tokend ); };
+ *|;
+
+ or_literal := |*
+ # Escape sequences in OR expressions.
+ '\\0' => { token( RE_Char, "\0" ); };
+ '\\a' => { token( RE_Char, "\a" ); };
+ '\\b' => { token( RE_Char, "\b" ); };
+ '\\t' => { token( RE_Char, "\t" ); };
+ '\\n' => { token( RE_Char, "\n" ); };
+ '\\v' => { token( RE_Char, "\v" ); };
+ '\\f' => { token( RE_Char, "\f" ); };
+ '\\r' => { token( RE_Char, "\r" ); };
+ '\\\n' => { updateCol(); };
+ '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+
+ # Range dash in an OR expression.
+ '-' => { token( RE_Dash, 0, 0 ); };
+
+ # Terminate an OR expression.
+ ']' => { token( RE_SqClose ); fret; };
+
+ # Characters in an OR expression.
+ [^\]] => { token( RE_Char, tokstart, tokend ); };
+ *|;
+
+ re_literal := |*
+ # Escape sequences in regular expressions.
+ '\\0' => { token( RE_Char, "\0" ); };
+ '\\a' => { token( RE_Char, "\a" ); };
+ '\\b' => { token( RE_Char, "\b" ); };
+ '\\t' => { token( RE_Char, "\t" ); };
+ '\\n' => { token( RE_Char, "\n" ); };
+ '\\v' => { token( RE_Char, "\v" ); };
+ '\\f' => { token( RE_Char, "\f" ); };
+ '\\r' => { token( RE_Char, "\r" ); };
+ '\\\n' => { updateCol(); };
+ '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+
+ # Terminate an OR expression.
+ '/' [i]? => {
+ token( RE_Slash, tokstart, tokend );
+ fgoto parser_def;
+ };
+
+ # Special characters.
+ '.' => { token( RE_Dot ); };
+ '*' => { token( RE_Star ); };
+
+ '[' => { token( RE_SqOpen ); fcall or_literal; };
+ '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
+
+ # Characters in an OR expression.
+ [^\/] => { token( RE_Char, tokstart, tokend ); };
+ *|;
+
+ write_statement := |*
+ ident => { token( TK_Word, tokstart, tokend ); } ;
+ [ \t\n]+ => { updateCol(); };
+ ';' => { token( ';' ); fgoto parser_def; };
+ *|;
+
+ # Parser definitions.
+ parser_def := |*
+ 'machine' => { token( KW_Machine ); };
+ 'include' => { token( KW_Include ); };
+ 'write' => {
+ token( KW_Write );
+ fgoto write_statement;
+ };
+ 'action' => { token( KW_Action ); };
+ 'alphtype' => { token( KW_AlphType ); };
+ 'range' => { token( KW_Range ); };
+ 'getkey' => {
+ token( KW_GetKey );
+ inlineBlockType = SemiTerminated;
+ fgoto inline_code;
+ };
+ 'access' => {
+ token( KW_Access );
+ inlineBlockType = SemiTerminated;
+ fgoto inline_code;
+ };
+ 'variable' => {
+ token( KW_Variable );
+ inlineBlockType = SemiTerminated;
+ fgoto inline_code;
+ };
+ 'when' => { token( KW_When ); };
+ 'eof' => { token( KW_Eof ); };
+ 'err' => { token( KW_Err ); };
+ 'lerr' => { token( KW_Lerr ); };
+ 'to' => { token( KW_To ); };
+ 'from' => { token( KW_From ); };
+
+ # Identifiers.
+ ident => { token( TK_Word, tokstart, tokend ); } ;
+
+ # Numbers
+ number => { token( TK_UInt, tokstart, tokend ); };
+ hex_number => { token( TK_Hex, tokstart, tokend ); };
+
+ # Literals, with optionals.
+ ( s_literal | d_literal ) [i]?
+ => { token( TK_Literal, tokstart, tokend ); };
+
+ '[' => { token( RE_SqOpen ); fcall or_literal; };
+ '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
+
+ '/' => { token( RE_Slash ); fgoto re_literal; };
+
+ # Ignore.
+ pound_comment => { updateCol(); };
+
+ ':=' => { token( TK_ColonEquals ); };
+
+ # To State Actions.
+ ">~" => { token( TK_StartToState ); };
+ "$~" => { token( TK_AllToState ); };
+ "%~" => { token( TK_FinalToState ); };
+ "<~" => { token( TK_NotStartToState ); };
+ "@~" => { token( TK_NotFinalToState ); };
+ "<>~" => { token( TK_MiddleToState ); };
+
+ # From State actions
+ ">*" => { token( TK_StartFromState ); };
+ "$*" => { token( TK_AllFromState ); };
+ "%*" => { token( TK_FinalFromState ); };
+ "<*" => { token( TK_NotStartFromState ); };
+ "@*" => { token( TK_NotFinalFromState ); };
+ "<>*" => { token( TK_MiddleFromState ); };
+
+ # EOF Actions.
+ ">/" => { token( TK_StartEOF ); };
+ "$/" => { token( TK_AllEOF ); };
+ "%/" => { token( TK_FinalEOF ); };
+ "</" => { token( TK_NotStartEOF ); };
+ "@/" => { token( TK_NotFinalEOF ); };
+ "<>/" => { token( TK_MiddleEOF ); };
+
+ # Global Error actions.
+ ">!" => { token( TK_StartGblError ); };
+ "$!" => { token( TK_AllGblError ); };
+ "%!" => { token( TK_FinalGblError ); };
+ "<!" => { token( TK_NotStartGblError ); };
+ "@!" => { token( TK_NotFinalGblError ); };
+ "<>!" => { token( TK_MiddleGblError ); };
+
+ # Local error actions.
+ ">^" => { token( TK_StartLocalError ); };
+ "$^" => { token( TK_AllLocalError ); };
+ "%^" => { token( TK_FinalLocalError ); };
+ "<^" => { token( TK_NotStartLocalError ); };
+ "@^" => { token( TK_NotFinalLocalError ); };
+ "<>^" => { token( TK_MiddleLocalError ); };
+
+ # Middle.
+ "<>" => { token( TK_Middle ); };
+
+ # Conditions.
+ '>?' => { token( TK_StartCond ); };
+ '$?' => { token( TK_AllCond ); };
+ '%?' => { token( TK_LeavingCond ); };
+
+ '..' => { token( TK_DotDot ); };
+ '**' => { token( TK_StarStar ); };
+ '--' => { token( TK_DashDash ); };
+ '->' => { token( TK_Arrow ); };
+ '=>' => { token( TK_DoubleArrow ); };
+
+ ":>" => { token( TK_ColonGt ); };
+ ":>>" => { token( TK_ColonGtGt ); };
+ "<:" => { token( TK_LtColon ); };
+
+ # Opening of longest match.
+ "|*" => { token( TK_BarStar ); };
+
+ '}%%' => {
+ /* In order to generate anything we must be in the top level file
+ * and the current spec must be active and there must not have been
+ * any parse errors. */
+ updateCol();
+ endSection();
+ fgoto main;
+ };
+
+ [ \t]+ => { updateCol(); };
+
+ # If we are in a single line machine then newline may end the spec.
+ NL => {
+ updateCol();
+ if ( singleLineSpec ) {
+ /* In order to generate anything we must be in the top level file
+ * and the current spec must be active and there must not have been
+ * any parse errors. */
+ endSection();
+ fgoto main;
+ }
+ };
+
+ '{' => {
+ token( '{' );
+ curly_count = 1;
+ inlineBlockType = CurlyDelimited;
+ fgoto inline_code;
+ };
+
+ any => { token( *tokstart ); } ;
+ *|;
+
+ action pass {
+ updateCol();
+
+ /* If no errors and we are at the bottom of the include stack (the
+ * source file listed on the command line) then write out the data. */
+ if ( include_depth == 0 && machineSpec == 0 && machineName == 0 )
+ xmlEscapeHost( *outStream, tokstart, tokend-tokstart );
+ }
+
+ # Outside code scanner. These tokens get passed through.
+ main := |*
+ ident => pass;
+ number => pass;
+ c_cpp_comment => pass;
+ s_literal | d_literal => pass;
+ '%%{' => {
+ updateCol();
+ singleLineSpec = false;
+ startSection();
+ fgoto parser_def;
+ };
+ '%%' => {
+ updateCol();
+ singleLineSpec = true;
+ startSection();
+ fgoto parser_def;
+ };
+ whitespace+ => pass;
+ EOF;
+ any => pass;
+ *|;
+
+}%%
+
+%% write data;
+
+void Scanner::do_scan()
+{
+ int bufsize = 8;
+ char *buf = new char[bufsize];
+ const char last_char = 0;
+ int cs, act, have = 0;
+ int top, stack[1];
+ int curly_count = 0;
+ bool execute = true;
+ bool singleLineSpec = false;
+ InlineBlockType inlineBlockType;
+
+ %% write init;
+
+ while ( execute ) {
+ char *p = buf + have;
+ int space = bufsize - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. Grow it. */
+ bufsize = bufsize * 2;
+ char *newbuf = new char[bufsize];
+ //cerr << "FULL BUFFER, NEW SIZE: " << bufsize << endl;
+
+ /* Recompute p and space. */
+ p = newbuf + have;
+ space = bufsize - have;
+
+ /* Patch up pointers possibly in use. */
+ if ( tokstart != 0 )
+ tokstart = newbuf + ( tokstart - buf );
+ tokend = newbuf + ( tokend - buf );
+
+ /* Copy the new buffer in. */
+ memcpy( newbuf, buf, have );
+ delete[] buf;
+ buf = newbuf;
+ }
+
+ input.read( p, space );
+ int len = input.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = last_char, len = 1;
+ execute = false;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == rlscan_error ) {
+ /* Machine failed before finding a token. */
+ //cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ /* Decide if we need to preserve anything. */
+ char *preserve = tokstart;
+
+ /* Now set up the prefix. */
+ if ( preserve == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - preserve;
+ memmove( buf, preserve, have );
+ unsigned int shiftback = preserve - buf;
+ if ( tokstart != 0 )
+ tokstart -= shiftback;
+ tokend -= shiftback;
+
+ preserve = buf;
+ }
+ }
+
+ delete[] buf;
+}
+
+void scan( char *fileName, istream &input )
+{
+ Scanner scanner( fileName, input, 0, 0, 0 );
+ scanner.init();
+ scanner.do_scan();
+}
+
diff --git a/ragel/xmlcodegen.cpp b/ragel/xmlcodegen.cpp
new file mode 100644
index 0000000..bc9c155
--- /dev/null
+++ b/ragel/xmlcodegen.cpp
@@ -0,0 +1,675 @@
+/*
+ * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "ragel.h"
+#include "xmlcodegen.h"
+#include "parsedata.h"
+#include "fsmgraph.h"
+#include <string.h>
+
+using namespace std;
+
+XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm,
+ std::ostream &out )
+:
+ fsmName(fsmName),
+ pd(pd),
+ fsm(fsm),
+ out(out),
+ nextActionTableId(0)
+{
+}
+
+
+void XMLCodeGen::writeActionList()
+{
+ /* Determine which actions to write. */
+ int nextActionId = 0;
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->numRefs() > 0 || act->numCondRefs > 0 )
+ act->actionId = nextActionId++;
+ }
+
+ /* Write the list. */
+ out << " <action_list length=\"" << nextActionId << "\">\n";
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->actionId >= 0 )
+ writeAction( act );
+ }
+ out << " </action_list>\n";
+}
+
+void XMLCodeGen::writeActionTableList()
+{
+ /* Must first order the action tables based on their id. */
+ int numTables = nextActionTableId;
+ RedActionTable **tables = new RedActionTable*[numTables];
+ for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
+ tables[at->id] = at;
+
+ out << " <action_table_list length=\"" << numTables << "\">\n";
+ for ( int t = 0; t < numTables; t++ ) {
+ out << " <action_table id=\"" << t << "\" length=\"" <<
+ tables[t]->key.length() << "\">";
+ for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
+ out << atel->value->actionId;
+ if ( ! atel.last() )
+ out << " ";
+ }
+ out << "</action_table>\n";
+ }
+ out << " </action_table_list>\n";
+
+ delete[] tables;
+}
+
+void XMLCodeGen::reduceActionTables()
+{
+ /* Reduce the actions tables to a set. */
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ RedActionTable *actionTable = 0;
+
+ /* Reduce To State Actions. */
+ if ( st->toStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce From State Actions. */
+ if ( st->fromStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce EOF actions. */
+ if ( st->eofActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Loop the transitions and reduce their actions. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->actionTable.length() > 0 ) {
+ if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+ }
+ }
+}
+
+void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey,
+ Key highKey, TransAp *trans )
+{
+ if ( trans->toState != 0 || trans->actionTable.length() > 0 )
+ outList.append( TransEl( lowKey, highKey, trans ) );
+}
+
+void XMLCodeGen::writeKey( Key key )
+{
+ if ( keyOps->isSigned )
+ out << key.getVal();
+ else
+ out << (unsigned long) key.getVal();
+}
+
+void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans )
+{
+ /* First reduce the action. */
+ RedActionTable *actionTable = 0;
+ if ( trans->actionTable.length() > 0 )
+ actionTable = actionTableMap.find( trans->actionTable );
+
+ /* Write the transition. */
+ out << " <t>";
+ writeKey( lowKey );
+ out << " ";
+ writeKey( highKey );
+
+ if ( trans->toState != 0 )
+ out << " " << trans->toState->alg.stateNum;
+ else
+ out << " x";
+
+ if ( actionTable != 0 )
+ out << " " << actionTable->id;
+ else
+ out << " x";
+ out << "</t>\n";
+}
+
+void XMLCodeGen::writeTransList( StateAp *state )
+{
+ TransListVect outList;
+
+ /* If there is only are no ranges the task is simple. */
+ if ( state->outList.length() > 0 ) {
+ /* Loop each source range. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Reduce the transition. If it reduced to anything then add it. */
+ appendTrans( outList, trans->lowKey, trans->highKey, trans );
+ }
+ }
+
+ out << " <trans_list length=\"" << outList.length() << "\">\n";
+ for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
+ writeTrans( tvi->lowKey, tvi->highKey, tvi->value );
+ out << " </trans_list>\n";
+}
+
+void XMLCodeGen::writeLmSwitch( InlineItem *item )
+{
+ LongestMatch *longestMatch = item->longestMatch;
+
+ out << "<lm_switch";
+ if ( longestMatch->lmSwitchHandlesError )
+ out << " handles_error=\"t\"";
+ out << ">\n";
+
+ for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
+ if ( lmi->inLmSelect && lmi->action != 0 ) {
+ /* Open the action. Write it with the context that sets up _p
+ * when doing control flow changes from inside the machine. */
+ out << " <sub_action id=\"" << lmi->longestMatchId << "\">";
+ writeInlineList( lmi->action->inlineList, item );
+ out << "</sub_action>\n";
+ }
+ }
+
+ out << " </lm_switch><exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeText( InlineItem *item )
+{
+ if ( item->prev == 0 || item->prev->type != InlineItem::Text )
+ out << "<text>";
+ xmlEscapeHost( out, item->data, strlen(item->data) );
+ if ( item->next == 0 || item->next->type != InlineItem::Text )
+ out << "</text>";
+}
+
+void XMLCodeGen::writeCtrlFlow( InlineItem *item, InlineItem *context )
+{
+ if ( context != 0 ) {
+ out << "<sub_action>";
+
+ switch ( context->type ) {
+ case InlineItem::LmOnLast:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmOnNext:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmOnLagBehind:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmSwitch:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ default: break;
+ }
+ }
+
+ switch ( item->type ) {
+ case InlineItem::Goto:
+ writeGoto( item, context );
+ break;
+ case InlineItem::GotoExpr:
+ writeGotoExpr( item, context );
+ break;
+ case InlineItem::Call:
+ writeCall( item, context );
+ break;
+ case InlineItem::CallExpr:
+ writeCallExpr( item, context );
+ break;
+ case InlineItem::Next:
+ writeNext( item, context );
+ break;
+ case InlineItem::NextExpr:
+ writeNextExpr( item, context );
+ break;
+ case InlineItem::Break:
+ out << "<break></break>";
+ break;
+ case InlineItem::Ret:
+ out << "<ret></ret>";
+ break;
+ default: break;
+ }
+
+ if ( context != 0 )
+ out << "</sub_action>";
+}
+
+void XMLCodeGen::writePtrMod( InlineItem *item, InlineItem *context )
+{
+ if ( context != 0 && ( context->type == InlineItem::LmOnNext ||
+ context->type == InlineItem::LmOnLagBehind ||
+ context->type == InlineItem::LmSwitch ) )
+ {
+ switch ( item->type ) {
+ case InlineItem::Hold:
+ out << "<holdte></holdte>";
+ break;
+ case InlineItem::Exec:
+ writeActionExecTE( item );
+ break;
+ default: break;
+ }
+ }
+ else {
+ switch ( item->type ) {
+ case InlineItem::Hold:
+ out << "<hold></hold>";
+ break;
+ case InlineItem::Exec:
+ writeActionExec( item );
+ break;
+ default: break;
+ }
+ }
+}
+
+
+void XMLCodeGen::writeGoto( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<goto>-1</goto>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<goto>" << targ->value->alg.stateNum << "</goto>";
+ }
+}
+
+void XMLCodeGen::writeCall( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<call>-1</call>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<call>" << targ->value->alg.stateNum << "</call>";
+ }
+}
+
+void XMLCodeGen::writeNext( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<next>-1</next>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<next>" << targ->value->alg.stateNum << "</next>";
+ }
+}
+
+void XMLCodeGen::writeGotoExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<goto_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</goto_expr>";
+}
+
+void XMLCodeGen::writeCallExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<call_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</call_expr>";
+}
+
+void XMLCodeGen::writeNextExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<next_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</next_expr>";
+}
+
+void XMLCodeGen::writeEntry( InlineItem * item )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<entry>-1</entry>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<entry>" << targ->value->alg.stateNum << "</entry>";
+ }
+}
+
+void XMLCodeGen::writeActionExec( InlineItem *item )
+{
+ out << "<exec>";
+ writeInlineList( item->children, 0 );
+ out << "</exec>";
+}
+
+void XMLCodeGen::writeActionExecTE( InlineItem *item )
+{
+ out << "<execte>";
+ writeInlineList( item->children, 0 );
+ out << "</execte>";
+}
+
+void XMLCodeGen::writeLmOnLast( InlineItem *item )
+{
+ out << "<set_tokend>1</set_tokend>";
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeLmOnNext( InlineItem *item )
+{
+ out << "<set_tokend>0</set_tokend>";
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeLmOnLagBehind( InlineItem *item )
+{
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+
+void XMLCodeGen::writeInlineList( InlineList *inlineList, InlineItem *context )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ writeText( item );
+ break;
+ case InlineItem::Goto: case InlineItem::GotoExpr:
+ case InlineItem::Call: case InlineItem::CallExpr:
+ case InlineItem::Next: case InlineItem::NextExpr:
+ case InlineItem::Break: case InlineItem::Ret:
+ writeCtrlFlow( item, context );
+ break;
+ case InlineItem::PChar:
+ out << "<pchar></pchar>";
+ break;
+ case InlineItem::Char:
+ out << "<char></char>";
+ break;
+ case InlineItem::Curs:
+ out << "<curs></curs>";
+ break;
+ case InlineItem::Targs:
+ out << "<targs></targs>";
+ break;
+ case InlineItem::Entry:
+ writeEntry( item );
+ break;
+
+ case InlineItem::Hold:
+ case InlineItem::Exec:
+ writePtrMod( item, context );
+ break;
+
+ case InlineItem::LmSwitch:
+ writeLmSwitch( item );
+ break;
+ case InlineItem::LmSetActId:
+ out << "<set_act>" <<
+ item->longestMatchPart->longestMatchId <<
+ "</set_act>";
+ break;
+ case InlineItem::LmSetTokEnd:
+ out << "<set_tokend>1</set_tokend>";
+ break;
+ case InlineItem::LmOnLast:
+ writeLmOnLast( item );
+ break;
+ case InlineItem::LmOnNext:
+ writeLmOnNext( item );
+ break;
+ case InlineItem::LmOnLagBehind:
+ writeLmOnLagBehind( item );
+ break;
+ case InlineItem::LmInitAct:
+ out << "<init_act></init_act>";
+ break;
+ case InlineItem::LmInitTokStart:
+ out << "<init_tokstart></init_tokstart>";
+ break;
+ case InlineItem::LmSetTokStart:
+ out << "<set_tokstart></set_tokstart>";
+ break;
+ }
+ }
+}
+
+void XMLCodeGen::writeAction( Action *action )
+{
+ out << " <action id=\"" << action->actionId << "\"";
+ if ( action->name != 0 )
+ out << " name=\"" << action->name << "\"";
+ out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">";
+ writeInlineList( action->inlineList, 0 );
+ out << "</action>\n";
+}
+
+void xmlEscapeHost( std::ostream &out, char *data, int len )
+{
+ char *end = data + len;
+ while ( data != end ) {
+ switch ( *data ) {
+ case '<': out << "&lt;"; break;
+ case '>': out << "&gt;"; break;
+ case '&': out << "&amp;"; break;
+ default: out << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void XMLCodeGen::writeStateActions( StateAp *state )
+{
+ RedActionTable *toStateActions = 0;
+ if ( state->toStateActionTable.length() > 0 )
+ toStateActions = actionTableMap.find( state->toStateActionTable );
+
+ RedActionTable *fromStateActions = 0;
+ if ( state->fromStateActionTable.length() > 0 )
+ fromStateActions = actionTableMap.find( state->fromStateActionTable );
+
+ RedActionTable *eofActions = 0;
+ if ( state->eofActionTable.length() > 0 )
+ eofActions = actionTableMap.find( state->eofActionTable );
+
+ if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
+ out << " <state_actions>";
+ if ( toStateActions != 0 )
+ out << toStateActions->id;
+ else
+ out << "x";
+
+ if ( fromStateActions != 0 )
+ out << " " << fromStateActions->id;
+ else
+ out << " x";
+
+ if ( eofActions != 0 )
+ out << " " << eofActions->id;
+ else
+ out << " x"; out << "</state_actions>\n";
+ }
+}
+
+void XMLCodeGen::writeStateConditions( StateAp *state )
+{
+ if ( state->stateCondList.length() > 0 ) {
+ out << " <cond_list length=\"" << state->stateCondList.length() << "\">\n";
+ for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
+ out << " <c>";
+ writeKey( scdi->lowKey );
+ out << " ";
+ writeKey( scdi->highKey );
+ out << " ";
+ out << scdi->condSpace->condSpaceId;
+ out << "</c>\n";
+ }
+ out << " </cond_list>\n";
+ }
+}
+
+void XMLCodeGen::writeStateList()
+{
+ /* Write the list of states. */
+ out << " <state_list length=\"" << fsm->stateList.length() << "\">\n";
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ out << " <state id=\"" << st->alg.stateNum << "\"";
+ if ( st->isFinState() )
+ out << " final=\"t\"";
+ out << ">\n";
+
+ writeStateActions( st );
+ writeStateConditions( st );
+ writeTransList( st );
+
+ out << " </state>\n";
+
+ if ( !st.last() )
+ out << "\n";
+
+ }
+ out << " </state_list>\n";
+}
+
+void XMLCodeGen::writeEntryPoints()
+{
+ /* List of entry points other than start state. */
+ if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) {
+ out << " <entry_points";
+ if ( pd->lmRequiresErrorState )
+ out << " error=\"t\"";
+ out << ">\n";
+ for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
+ /* Get the name instantiation from nameIndex. */
+ NameInst *nameInst = pd->nameIndex[en->key];
+ StateAp *state = en->value;
+ out << " <entry name=\"" << nameInst->name << "\">" <<
+ state->alg.stateNum << "</entry>\n";
+ }
+ out << " </entry_points>\n";
+ }
+}
+
+void XMLCodeGen::writeMachine()
+{
+ fsm->setStateNumbers();
+
+ /* Open the machine. */
+ out << " <machine>\n";
+
+ /* Action tables. */
+ reduceActionTables();
+
+ writeActionList();
+ writeActionTableList();
+ writeConditions();
+
+ /* Start state. */
+ out << " <start_state>" << fsm->startState->alg.stateNum <<
+ "</start_state>\n";
+
+ writeEntryPoints();
+ writeStateList();
+
+ out << " </machine>\n";
+}
+
+void XMLCodeGen::writeAlphType()
+{
+ out << " <alphtype>" <<
+ (keyOps->alphType - hostLang->hostTypes) << "</alphtype>\n";
+}
+
+void XMLCodeGen::writeGetKeyExpr()
+{
+ out << " <getkey>";
+ writeInlineList( pd->getKeyExpr, 0 );
+ out << "</getkey>\n";
+}
+
+void XMLCodeGen::writeAccessExpr()
+{
+ out << " <access>";
+ writeInlineList( pd->accessExpr, 0 );
+ out << "</access>\n";
+}
+
+void XMLCodeGen::writeCurStateExpr()
+{
+ out << " <curstate>";
+ writeInlineList( pd->curStateExpr, 0 );
+ out << "</curstate>\n";
+}
+
+void XMLCodeGen::writeConditions()
+{
+ if ( condData->condSpaceMap.length() > 0 ) {
+ long nextCondSpaceId = 0;
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
+ cs->condSpaceId = nextCondSpaceId++;
+
+ out << " <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n";
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
+ out << " <cond_space id=\"" << cs->condSpaceId <<
+ "\" length=\"" << cs->condSet.length() << "\">";
+ writeKey( cs->baseKey );
+ for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ )
+ out << " " << (*csi)->actionId;
+ out << "</cond_space>\n";
+ }
+ out << " </cond_space_list>\n";
+ }
+}
+
+void XMLCodeGen::writeXML()
+{
+ /* Open the definition. */
+ out << "<ragel_def name=\"" << fsmName << "\">\n";
+ writeAlphType();
+
+ if ( pd->getKeyExpr != 0 )
+ writeGetKeyExpr();
+
+ if ( pd->accessExpr != 0 )
+ writeAccessExpr();
+
+ if ( pd->curStateExpr != 0 )
+ writeCurStateExpr();
+
+ writeMachine();
+
+ out <<
+ "</ragel_def>\n";
+}
+
diff --git a/ragel/xmlcodegen.h b/ragel/xmlcodegen.h
new file mode 100644
index 0000000..ab08bc2
--- /dev/null
+++ b/ragel/xmlcodegen.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XMLDOTGEN_H
+#define _XMLDOTGEN_H
+
+#include <iostream>
+#include "avltree.h"
+#include "fsmgraph.h"
+#include "parsedata.h"
+
+/* Forwards. */
+struct TransAp;
+struct FsmAp;
+struct ParseData;
+
+struct RedActionTable
+:
+ public AvlTreeEl<RedActionTable>
+{
+ RedActionTable( const ActionTable &key )
+ :
+ key(key),
+ id(0)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int id;
+};
+
+typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap;
+
+struct NextRedTrans
+{
+ Key lowKey, highKey;
+ TransAp *trans;
+ TransAp *next;
+
+ void load() {
+ if ( trans != 0 ) {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ NextRedTrans( TransAp *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+class XMLCodeGen
+{
+public:
+ XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, std::ostream &out );
+ void writeXML( );
+
+private:
+ void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans );
+ void writeStateActions( StateAp *state );
+ void writeStateList();
+ void writeStateConditions( StateAp *state );
+
+ void writeKey( Key key );
+ void writeText( InlineItem *item );
+ void writeCtrlFlow( InlineItem *item, InlineItem *context );
+ void writePtrMod( InlineItem *item, InlineItem *context );
+ void writeGoto( InlineItem *item, InlineItem *context );
+ void writeGotoExpr( InlineItem *item, InlineItem *context );
+ void writeCall( InlineItem *item, InlineItem *context );
+ void writeCallExpr( InlineItem *item, InlineItem *context );
+ void writeNext( InlineItem *item, InlineItem *context );
+ void writeNextExpr( InlineItem *item, InlineItem *context );
+ void writeEntry( InlineItem *item );
+ void writeLmSetActId( InlineItem *item );
+ void writeLmOnLast( InlineItem *item );
+ void writeLmOnNext( InlineItem *item );
+ void writeLmOnLagBehind( InlineItem *item );
+
+ void writeEntryPoints();
+ void writeGetKeyExpr();
+ void writeAccessExpr();
+ void writeCurStateExpr();
+ void writeConditions();
+ void writeInlineList( InlineList *inlineList, InlineItem *context );
+ void writeAlphType();
+ void writeActionList();
+ void writeActionTableList();
+ void reduceTrans( TransAp *trans );
+ void reduceActionTables();
+ void writeTransList( StateAp *state );
+ void writeTrans( Key lowKey, Key highKey, TransAp *defTrans );
+ void writeAction( Action *action );
+ void writeLmSwitch( InlineItem *item );
+ void writeMachine();
+ void writeActionExec( InlineItem *item );
+ void writeActionExecTE( InlineItem *item );
+
+ char *fsmName;
+ ParseData *pd;
+ FsmAp *fsm;
+ std::ostream &out;
+ ActionTableMap actionTableMap;
+ int nextActionTableId;
+};
+
+
+#endif /* _XMLDOTGEN_H */
diff --git a/rlcodegen/Makefile.in b/rlcodegen/Makefile.in
new file mode 100644
index 0000000..c6a9838
--- /dev/null
+++ b/rlcodegen/Makefile.in
@@ -0,0 +1,93 @@
+#
+# Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+INCS += -I../common -I../aapl
+DEFS +=
+
+CFLAGS += -g -Wall
+LDFLAGS +=
+
+CC_SRCS = \
+ gendata.cpp xmltags.cpp xmlscan.cpp xmlparse.cpp \
+ main.cpp redfsm.cpp gvdotgen.cpp fsmcodegen.cpp \
+ tabcodegen.cpp ftabcodegen.cpp flatcodegen.cpp \
+ fflatcodegen.cpp gotocodegen.cpp fgotocodegen.cpp \
+ ipgotocodegen.cpp splitcodegen.cpp javacodegen.cpp
+
+GEN_SRC = xmltags.cpp xmlscan.cpp xmlparse.cpp xmlparse.h
+
+LIBS += @LIBS@
+PREFIX += @prefix@
+
+BUILD_PARSERS = @BUILD_PARSERS@
+
+#*************************************
+
+# Programs
+CXX = @CXX@
+
+# Get objects and dependencies from sources.
+OBJS = $(CC_SRCS:%.cpp=%.o)
+DEPS = $(CC_SRCS:%.cpp=.%.d)
+
+# Get the version info.
+include ../version.mk
+
+# Rules.
+all: rlcodegen
+
+rlcodegen: $(GEN_SRC) $(OBJS)
+ $(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+
+ifeq ($(BUILD_PARSERS),true)
+
+xmlparse.h: xmlparse.kh
+ kelbt -o $@ $<
+
+xmlparse.cpp: xmlparse.kl xmlparse.kh
+ kelbt -o $@ $<
+
+xmlscan.cpp: xmlscan.rl
+ ragel xmlscan.rl | rlcodegen -G2 -o xmlscan.cpp
+
+xmltags.cpp: xmltags.gperf
+ gperf -L C++ -t $< > $@
+
+endif
+
+%.o: %.cpp
+ @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d
+ $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $<
+
+distclean: clean
+ rm -f Makefile
+
+ifeq ($(BUILD_PARSERS),true)
+EXTRA_CLEAN = $(GEN_SRC)
+endif
+
+clean:
+ rm -f tags .*.d *.o rlcodegen $(EXTRA_CLEAN)
+
+install: all
+ install -d $(PREFIX)/bin
+ install -s rlcodegen $(PREFIX)/bin/rlcodegen
+
+-include $(DEPS)
diff --git a/rlcodegen/fflatcodegen.cpp b/rlcodegen/fflatcodegen.cpp
new file mode 100644
index 0000000..25f4d24
--- /dev/null
+++ b/rlcodegen/fflatcodegen.cpp
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "fflatcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+std::ostream &FFlatCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FFlatCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FFlatCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ out << act;
+ return out;
+}
+
+/* Write out the function for a transition. */
+std::ostream &FFlatCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ int action = 0;
+ if ( trans->action != 0 )
+ action = trans->action->actListId+1;
+ out << action;
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FFlatCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void FFlatCodeGen::writeOutData()
+{
+ if ( anyConditions() ) {
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondSpan), CSP() );
+ COND_KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCond), C() );
+ CONDS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondIndexOffset), CO() );
+ COND_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxSpan), SP() );
+ KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxFlatIndexOffset), IO() );
+ FLAT_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActListId), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+}
+
+void FFlatCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _slen";
+
+ if ( anyRegCurStateRef() )
+ out << ", _ps";
+
+ out << ";\n";
+ out << " int _trans";
+
+ if ( anyConditions() )
+ out << ", _cond";
+
+ out << ";\n";
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " " << PTR_CONST() << ARRAY_TYPE(maxIndex) << POINTER() << "_inds;\n";
+
+ if ( anyConditions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(maxCond) << POINTER() << "_conds;\n"
+ " " << WIDE_ALPH_TYPE() << " _widec;\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ if ( anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n\n";
+
+ if ( anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " switch ( " << TA() << "[_trans] ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() )
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FFlatCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/fflatcodegen.h b/rlcodegen/fflatcodegen.h
new file mode 100644
index 0000000..7cfbd66
--- /dev/null
+++ b/rlcodegen/fflatcodegen.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FFLATCODEGEN_H
+#define _FFLATCODEGEN_H
+
+#include <iostream>
+#include "flatcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+/*
+ * FFlatCodeGen
+ */
+class FFlatCodeGen : public FlatCodeGen
+{
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+};
+
+/*
+ * CFFlatCodeGen
+ */
+struct CFFlatCodeGen
+ : public FFlatCodeGen, public CCodeGen
+{
+};
+
+/*
+ * DFFlatCodeGen
+ */
+struct DFFlatCodeGen
+ : public FFlatCodeGen, public DCodeGen
+{
+};
+
+#endif /* _FFLATCODEGEN_H */
diff --git a/rlcodegen/fgotocodegen.cpp b/rlcodegen/fgotocodegen.cpp
new file mode 100644
index 0000000..34ef47c
--- /dev/null
+++ b/rlcodegen/fgotocodegen.cpp
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "fgotocodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include "bstmap.h"
+
+std::ostream &FGotoCodeGen::EXEC_ACTIONS()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* We are at the start of a glob, write the case. */
+ out << "f" << redAct->actListId << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tgoto _again;\n";
+ }
+ }
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FGotoCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FGotoCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FGotoCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FGotoCodeGen::FINISH_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* States that are final and have an out action need a case. */
+ if ( st->eofAction != 0 ) {
+ /* Write the case label. */
+ out << "\t\tcase " << st->id << ": ";
+
+ /* Jump to the func. */
+ out << "goto f" << st->eofAction->actListId << ";\n";
+ }
+ }
+
+ return out;
+}
+
+unsigned int FGotoCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ return act;
+}
+
+unsigned int FGotoCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ return act;
+}
+
+unsigned int FGotoCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ return act;
+}
+
+void FGotoCodeGen::writeOutData()
+{
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+}
+
+void FGotoCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " ) {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ TRANSITIONS() <<
+ "\n";
+
+ if ( anyRegActions() )
+ EXEC_ACTIONS() << "\n";
+
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FGotoCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/fgotocodegen.h b/rlcodegen/fgotocodegen.h
new file mode 100644
index 0000000..e971877
--- /dev/null
+++ b/rlcodegen/fgotocodegen.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FGOTOCODEGEN_H
+#define _FGOTOCODEGEN_H
+
+#include <iostream>
+#include "gotocodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+
+/*
+ * class FGotoCodeGen
+ */
+class FGotoCodeGen : public GotoCodeGen
+{
+public:
+ std::ostream &EXEC_ACTIONS();
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &FINISH_CASES();
+ std::ostream &EOF_ACTION_SWITCH();
+ unsigned int TO_STATE_ACTION( RedStateAp *state );
+ unsigned int FROM_STATE_ACTION( RedStateAp *state );
+ unsigned int EOF_ACTION( RedStateAp *state );
+
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+};
+
+/*
+ * class CFGotoCodeGen
+ */
+struct CFGotoCodeGen
+ : public FGotoCodeGen, public CCodeGen
+{
+};
+
+/*
+ * class DFGotoCodeGen
+ */
+struct DFGotoCodeGen
+ : public FGotoCodeGen, public DCodeGen
+{
+};
+
+#endif /* _FGOTOCODEGEN_H */
diff --git a/rlcodegen/flatcodegen.cpp b/rlcodegen/flatcodegen.cpp
new file mode 100644
index 0000000..d5e96d3
--- /dev/null
+++ b/rlcodegen/flatcodegen.cpp
@@ -0,0 +1,777 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "flatcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+std::ostream &FlatCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ /* If there are actions, emit them. Otherwise emit zero. */
+ int act = 0;
+ if ( trans->action != 0 )
+ act = trans->action->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::FLAT_INDEX_OFFSET()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ if ( st->transList != 0 )
+ curIndOffset += keyOps->span( st->lowKey, st->highKey );
+
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::KEY_SPANS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ unsigned long long span = 0;
+ if ( st->transList != 0 )
+ span = keyOps->span( st->lowKey, st->highKey );
+ out << span;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::TO_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ TO_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ FROM_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ EOF_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit just cond low key and cond high key. */
+ out << KEY( st->condLowKey ) << ", ";
+ out << KEY( st->condHighKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_KEY_SPANS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ unsigned long long span = 0;
+ if ( st->condList != 0 )
+ span = keyOps->span( st->condLowKey, st->condHighKey );
+ out << span;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::CONDS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->condList != 0 ) {
+ /* Walk the singles. */
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->condList[pos] != 0 )
+ out << st->condList[pos]->condSpaceId + 1 << ", ";
+ else
+ out << "0, ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_INDEX_OFFSET()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ if ( st->condList != 0 )
+ curIndOffset += keyOps->span( st->condLowKey, st->condHighKey );
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit just low key and high key. */
+ out << KEY( st->lowKey ) << ", ";
+ out << KEY( st->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::INDICIES()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->transList != 0 ) {
+ /* Walk the singles. */
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ out << st->transList[pos]->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 )
+ out << st->defTrans->id << ", ";
+
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::TRANS_TARGS()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalStates = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write out the target state. */
+ RedTransAp *trans = transPtrs[t];
+ out << trans->targ->id;
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalStates % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::TRANS_ACTIONS()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalAct = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write the function for the transition. */
+ RedTransAp *trans = transPtrs[t];
+ TRANS_ACTION( trans );
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalAct % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+void FlatCodeGen::LOCATE_TRANS()
+{
+ out <<
+ " _keys = " << ARR_OFF( K(), "(" + CS() + "<<1)" ) << ";\n"
+ " _inds = " << ARR_OFF( I(), IO() + "[" + CS() + "]" ) << ";\n"
+ "\n"
+ " _slen = " << SP() << "[" << CS() << "];\n"
+ " _trans = _inds[ _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n"
+ " " << GET_WIDE_KEY() << " <= _keys[1] ?\n"
+ " " << GET_WIDE_KEY() << " - _keys[0] : _slen ];\n"
+ "\n";
+}
+
+void FlatCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void FlatCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void FlatCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void FlatCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void FlatCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+
+void FlatCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+
+void FlatCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void FlatCodeGen::writeOutData()
+{
+ /* If there are any transtion functions then output the array. If there
+ * are none, don't bother emitting an empty array that won't be used. */
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyConditions() ) {
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondSpan), CSP() );
+ COND_KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCond), C() );
+ CONDS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondIndexOffset), CO() );
+ COND_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxSpan), SP() );
+ KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxFlatIndexOffset), IO() );
+ FLAT_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+}
+
+void FlatCodeGen::COND_TRANSLATE()
+{
+ out <<
+ " _widec = " << GET_KEY() << ";\n";
+
+ out <<
+ " _keys = " << ARR_OFF( CK(), "(" + CS() + "<<1)" ) << ";\n"
+ " _conds = " << ARR_OFF( C(), CO() + "[" + CS() + "]" ) << ";\n"
+ "\n"
+ " _slen = " << CSP() << "[" << CS() << "];\n"
+ " _cond = _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n"
+ " " << GET_WIDE_KEY() << " <= _keys[1] ?\n"
+ " _conds[" << GET_WIDE_KEY() << " - _keys[0]] : 0;\n"
+ "\n";
+
+ out <<
+ " switch ( _cond ) {\n";
+ for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) {
+ CondSpace *condSpace = csi;
+ out << " case " << condSpace->condSpaceId + 1 << ": {\n";
+ out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(2) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+
+ out << " }\n";
+ out << " break;\n";
+ }
+
+ SWITCH_DEFAULT();
+
+ out <<
+ " }\n";
+}
+
+void FlatCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _slen";
+
+ if ( anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " int _trans";
+
+ if ( anyConditions() )
+ out << ", _cond";
+ out << ";\n";
+
+ if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " " << PTR_CONST() << ARRAY_TYPE(maxIndex) << POINTER() << "_inds;\n";
+
+ if ( anyConditions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(maxCond) << POINTER() << "_conds;\n"
+ " " << WIDE_ALPH_TYPE() << " _widec;\n";
+ }
+
+ out << "\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ if ( anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *(_acts++) )\n {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() )
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FlatCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/flatcodegen.h b/rlcodegen/flatcodegen.h
new file mode 100644
index 0000000..a2ccb1d
--- /dev/null
+++ b/rlcodegen/flatcodegen.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FLATCODEGEN_H
+#define _FLATCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+
+/*
+ * FlatCodeGen
+ */
+class FlatCodeGen : virtual public FsmCodeGen
+{
+public:
+ virtual ~FlatCodeGen() { }
+
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &KEYS();
+ std::ostream &INDICIES();
+ std::ostream &FLAT_INDEX_OFFSET();
+ std::ostream &KEY_SPANS();
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+ std::ostream &TRANS_TARGS();
+ std::ostream &TRANS_ACTIONS();
+ void LOCATE_TRANS();
+
+ std::ostream &COND_INDEX_OFFSET();
+ void COND_TRANSLATE();
+ std::ostream &CONDS();
+ std::ostream &COND_KEYS();
+ std::ostream &COND_KEY_SPANS();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+};
+
+/*
+ * CFlatCodeGen
+ */
+struct CFlatCodeGen
+ : public FlatCodeGen, public CCodeGen
+{
+};
+
+/*
+ * DFlatCodeGen
+ */
+struct DFlatCodeGen
+ : public FlatCodeGen, public DCodeGen
+{
+};
+
+#endif /* _FLATCODEGEN_H */
diff --git a/rlcodegen/fsmcodegen.cpp b/rlcodegen/fsmcodegen.cpp
new file mode 100644
index 0000000..b9aa458
--- /dev/null
+++ b/rlcodegen/fsmcodegen.cpp
@@ -0,0 +1,1012 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "fsmcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include <sstream>
+#include <string>
+#include <assert.h>
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+
+/* Determine if a string is only whitespace. Code blocks that are only
+ * whitespace need not be output. */
+bool onlyWhitespace( char *str )
+{
+ while ( *str != 0 ) {
+ if ( *str != ' ' && *str != '\t' && *str != '\n' &&
+ *str != '\v' && *str != '\f' && *str != '\r' )
+ return false;
+ str += 1;
+ }
+ return true;
+}
+
+/* Init code gen with in parameters. */
+FsmCodeGen::FsmCodeGen( )
+:
+ fsmName(0),
+ cgd(0),
+ redFsm(0),
+ out(*outStream),
+ bAnyToStateActions(false),
+ bAnyFromStateActions(false),
+ bAnyRegActions(false),
+ bAnyEofActions(false),
+ bAnyActionGotos(false),
+ bAnyActionCalls(false),
+ bAnyActionRets(false),
+ bAnyRegActionRets(false),
+ bAnyRegActionByValControl(false),
+ bAnyRegNextStmt(false),
+ bAnyRegCurStateRef(false),
+ bAnyRegBreak(false),
+ bAnyLmSwitchError(false),
+ bAnyConditions(false)
+{
+}
+
+/* Does the machine have any actions. */
+bool FsmCodeGen::anyActions()
+{
+ return redFsm->actionMap.length() > 0;
+}
+
+void FsmCodeGen::findFinalActionRefs()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Rerence count out of single transitions. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count out of range transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count default transition. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
+ st->defTrans->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+
+ /* Reference count to state actions. */
+ if ( st->toStateAction != 0 ) {
+ st->toStateAction->numToStateRefs += 1;
+ for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ item->value->numToStateRefs += 1;
+ }
+
+ /* Reference count from state actions. */
+ if ( st->fromStateAction != 0 ) {
+ st->fromStateAction->numFromStateRefs += 1;
+ for ( ActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ )
+ item->value->numFromStateRefs += 1;
+ }
+
+ /* Reference count EOF actions. */
+ if ( st->eofAction != 0 ) {
+ st->eofAction->numEofRefs += 1;
+ for ( ActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
+ item->value->numEofRefs += 1;
+ }
+ }
+}
+
+/* Assign ids to referenced actions. */
+void FsmCodeGen::assignActionIds()
+{
+ int nextActionId = 0;
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Only ever interested in referenced actions. */
+ if ( act->numRefs() > 0 )
+ act->actionId = nextActionId++;
+ }
+}
+
+void FsmCodeGen::setValueLimits()
+{
+ maxSingleLen = 0;
+ maxRangeLen = 0;
+ maxKeyOffset = 0;
+ maxIndexOffset = 0;
+ maxActListId = 0;
+ maxActionLoc = 0;
+ maxActArrItem = 0;
+ maxSpan = 0;
+ maxCondSpan = 0;
+ maxFlatIndexOffset = 0;
+ maxCondOffset = 0;
+ maxCondLen = 0;
+ maxCondSpaceId = 0;
+ maxCondIndexOffset = 0;
+
+ /* In both of these cases the 0 index is reserved for no value, so the max
+ * is one more than it would be if they started at 0. */
+ maxIndex = redFsm->transSet.length();
+ maxCond = cgd->condSpaceList.length();
+
+ /* The nextStateId - 1 is the last state id assigned. */
+ maxState = redFsm->nextStateId - 1;
+
+ for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) {
+ if ( csi->condSpaceId > maxCondSpaceId )
+ maxCondSpaceId = csi->condSpaceId;
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Maximum cond length. */
+ if ( st->stateCondList.length() > maxCondLen )
+ maxCondLen = st->stateCondList.length();
+
+ /* Maximum single length. */
+ if ( st->outSingle.length() > maxSingleLen )
+ maxSingleLen = st->outSingle.length();
+
+ /* Maximum range length. */
+ if ( st->outRange.length() > maxRangeLen )
+ maxRangeLen = st->outRange.length();
+
+ /* The key offset index offset for the state after last is not used, skip it.. */
+ if ( ! st.last() ) {
+ maxCondOffset += st->stateCondList.length();
+ maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
+ }
+
+ /* Max cond span. */
+ if ( st->condList != 0 ) {
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ if ( span > maxCondSpan )
+ maxCondSpan = span;
+ }
+
+ /* Max key span. */
+ if ( st->transList != 0 ) {
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ if ( span > maxSpan )
+ maxSpan = span;
+ }
+
+ /* Max cond index offset. */
+ if ( ! st.last() ) {
+ if ( st->condList != 0 )
+ maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey );
+ }
+
+ /* Max flat index offset. */
+ if ( ! st.last() ) {
+ if ( st->transList != 0 )
+ maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
+ maxFlatIndexOffset += 1;
+ }
+ }
+
+ for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) {
+ /* Maximum id of action lists. */
+ if ( at->actListId+1 > maxActListId )
+ maxActListId = at->actListId+1;
+
+ /* Maximum location of items in action array. */
+ if ( at->location+1 > maxActionLoc )
+ maxActionLoc = at->location+1;
+
+ /* Maximum values going into the action array. */
+ if ( at->key.length() > maxActArrItem )
+ maxActArrItem = at->key.length();
+ for ( ActionTable::Iter item = at->key; item.lte(); item++ ) {
+ if ( item->value->actionId > maxActArrItem )
+ maxActArrItem = item->value->actionId;
+ }
+ }
+}
+
+void FsmCodeGen::analyzeAction( Action *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Only consider actions that are referenced. */
+ if ( act->numRefs() > 0 ) {
+ if ( item->type == InlineItem::Goto || item->type == InlineItem::GotoExpr )
+ bAnyActionGotos = true;
+ else if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ bAnyActionCalls = true;
+ else if ( item->type == InlineItem::Ret )
+ bAnyActionRets = true;
+ }
+
+ /* Check for various things in regular actions. */
+ if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) {
+ /* Any returns in regular actions? */
+ if ( item->type == InlineItem::Ret )
+ bAnyRegActionRets = true;
+
+ /* Any next statements in the regular actions? */
+ if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr )
+ bAnyRegNextStmt = true;
+
+ /* Any by value control in regular actions? */
+ if ( item->type == InlineItem::CallExpr || item->type == InlineItem::GotoExpr )
+ bAnyRegActionByValControl = true;
+
+ /* Any references to the current state in regular actions? */
+ if ( item->type == InlineItem::Curs )
+ bAnyRegCurStateRef = true;
+
+ if ( item->type == InlineItem::Break )
+ bAnyRegBreak = true;
+
+ if ( item->type == InlineItem::LmSwitch && item->handlesError )
+ bAnyLmSwitchError = true;
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( act, item->children );
+ }
+}
+
+void FsmCodeGen::analyzeActionList( RedAction *redAct, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Any next statements in the action table? */
+ if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr )
+ redAct->bAnyNextStmt = true;
+
+ /* Any references to the current state. */
+ if ( item->type == InlineItem::Curs )
+ redAct->bAnyCurStateRef = true;
+
+ if ( item->type == InlineItem::Break )
+ redAct->bAnyBreakStmt = true;
+
+ if ( item->children != 0 )
+ analyzeActionList( redAct, item->children );
+ }
+}
+
+/* Gather various info on the machine. */
+void FsmCodeGen::analyzeMachine()
+{
+ /* Find the true count of action references. */
+ findFinalActionRefs();
+
+ /* Check if there are any calls in action code. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Record the occurrence of various kinds of actions. */
+ if ( act->numToStateRefs > 0 )
+ bAnyToStateActions = true;
+ if ( act->numFromStateRefs > 0 )
+ bAnyFromStateActions = true;
+ if ( act->numEofRefs > 0 )
+ bAnyEofActions = true;
+ if ( act->numTransRefs > 0 )
+ bAnyRegActions = true;
+
+ /* Recurse through the action's parse tree looking for various things. */
+ analyzeAction( act, act->inlineList );
+ }
+
+ /* Analyze reduced action lists. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ for ( ActionTable::Iter act = redAct->key; act.lte(); act++ )
+ analyzeActionList( redAct, act->value->inlineList );
+ }
+
+ /* Find states that have transitions with actions that have next
+ * statements. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Check any actions out of outSinge. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any actions out of outRange. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any action out of default. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 &&
+ st->defTrans->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+
+ if ( st->stateCondList.length() > 0 )
+ bAnyConditions = true;
+ }
+
+ /* Assign ids to actions that are referenced. */
+ assignActionIds();
+
+ /* Set the maximums of various values used for deciding types. */
+ setValueLimits();
+
+ /* Determine if we should use indicies. */
+ calcIndexSize();
+}
+
+unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+ return arrayType->size;
+}
+
+string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+
+ string ret = arrayType->data1;
+ if ( arrayType->data2 != 0 ) {
+ ret += " ";
+ ret += arrayType->data2;
+ }
+ return ret;
+}
+
+
+/* Write out the fsm name. */
+string FsmCodeGen::FSM_NAME()
+{
+ return fsmName;
+}
+
+/* Emit the offset of the start state as a decimal integer. */
+string FsmCodeGen::START_STATE_ID()
+{
+ ostringstream ret;
+ ret << redFsm->startState->id;
+ return ret.str();
+};
+
+/* Write out the array of actions. */
+std::ostream &FsmCodeGen::ACTIONS_ARRAY()
+{
+ out << "\t0, ";
+ int totalActions = 1;
+ for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ /* Write out the length, which will never be the last character. */
+ out << act->key.length() << ", ";
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+
+ for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
+ out << item->value->actionId;
+ if ( ! (act.last() && item.last()) )
+ out << ", ";
+
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+string FsmCodeGen::CS()
+{
+ ostringstream ret;
+ if ( cgd->curStateExpr != 0 ) {
+ /* Emit the user supplied method of retrieving the key. */
+ ret << "(";
+ INLINE_LIST( ret, cgd->curStateExpr, 0, false );
+ ret << ")";
+ }
+ else {
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << ACCESS() << "cs";
+ }
+ return ret.str();
+}
+
+string FsmCodeGen::ACCESS()
+{
+ ostringstream ret;
+ if ( cgd->accessExpr != 0 )
+ INLINE_LIST( ret, cgd->accessExpr, 0, false );
+ return ret.str();
+}
+
+string FsmCodeGen::GET_WIDE_KEY()
+{
+ if ( anyConditions() )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
+{
+ if ( state->stateCondList.length() > 0 )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_KEY()
+{
+ ostringstream ret;
+ if ( cgd->getKeyExpr != 0 ) {
+ /* Emit the user supplied method of retrieving the key. */
+ ret << "(";
+ INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
+ ret << ")";
+ }
+ else {
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << "(*" << P() << ")";
+ }
+ return ret.str();
+}
+
+/* Write out level number of tabs. Makes the nested binary search nice
+ * looking. */
+string FsmCodeGen::TABS( int level )
+{
+ string result;
+ while ( level-- > 0 )
+ result += "\t";
+ return result;
+}
+
+/* Write out a key from the fsm code gen. Depends on wether or not the key is
+ * signed. */
+string FsmCodeGen::KEY( Key key )
+{
+ ostringstream ret;
+ if ( keyOps->isSigned || !hostLang->explicitUnsigned )
+ ret << key.getVal();
+ else
+ ret << (unsigned long) key.getVal() << 'u';
+ return ret.str();
+}
+
+void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
+{
+ /* The parser gives fexec two children. The double brackets are for D
+ * code. If the inline list is a single word it will get interpreted as a
+ * C-style cast by the D compiler. */
+ ret << "{" << P() << " = ((";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "))-1;}";
+}
+
+void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish )
+{
+ /* Tokend version of exec. */
+
+ /* The parser gives fexec two children. The double brackets are for D
+ * code. If the inline list is a single word it will get interpreted as a
+ * C-style cast by the D compiler. */
+ ret << "{" << TOKEND() << " = ((";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "));}";
+}
+
+
+void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
+ int targState, int inFinish )
+{
+ ret <<
+ " switch( act ) {\n";
+
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ if ( item->handlesError ) {
+ ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; ";
+ GOTO( ret, redFsm->errState->id, inFinish );
+ ret << "\n";
+ }
+
+ for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
+ /* Write the case label, the action and the case break. */
+ ret << " case " << lma->lmId << ":\n";
+
+ /* Write the block and close it off. */
+ ret << " {";
+ INLINE_LIST( ret, lma->children, targState, inFinish );
+ ret << "}\n";
+
+ ret << " break;\n";
+ }
+ /* Default required for D code. */
+ ret <<
+ " default: break;\n"
+ " }\n"
+ "\t";
+}
+
+void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = " << item->lmId << ";";
+}
+
+void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
+{
+ /* The tokend action sets tokend. */
+ ret << TOKEND() << " = " << P();
+ if ( item->offset != 0 )
+ out << "+" << item->offset;
+ out << ";";
+}
+
+void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
+{
+ ret << TOKEND();
+}
+
+void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << NULL_ITEM() << ";";
+}
+
+void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = 0;";
+}
+
+void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << P() << ";";
+}
+
+void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
+ int targState, bool inFinish )
+{
+ if ( item->children->length() > 0 ) {
+ /* Write the block and close it off. */
+ ret << "{";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "}";
+ }
+}
+
+
+/* Write out an inline tree structure. Walks the list and possibly calls out
+ * to virtual functions than handle language specific items in the tree. */
+void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ ret << item->data;
+ break;
+ case InlineItem::Goto:
+ GOTO( ret, item->targState->id, inFinish );
+ break;
+ case InlineItem::Call:
+ CALL( ret, item->targState->id, targState, inFinish );
+ break;
+ case InlineItem::Next:
+ NEXT( ret, item->targState->id, inFinish );
+ break;
+ case InlineItem::Ret:
+ RET( ret, inFinish );
+ break;
+ case InlineItem::PChar:
+ ret << P();
+ break;
+ case InlineItem::Char:
+ ret << GET_KEY();
+ break;
+ case InlineItem::Hold:
+ ret << P() << "--;";
+ break;
+ case InlineItem::Exec:
+ EXEC( ret, item, targState, inFinish );
+ break;
+ case InlineItem::HoldTE:
+ ret << TOKEND() << "--;";
+ break;
+ case InlineItem::ExecTE:
+ EXECTE( ret, item, targState, inFinish );
+ break;
+ case InlineItem::Curs:
+ CURS( ret, inFinish );
+ break;
+ case InlineItem::Targs:
+ TARGS( ret, inFinish, targState );
+ break;
+ case InlineItem::Entry:
+ ret << item->targState->id;
+ break;
+ case InlineItem::GotoExpr:
+ GOTO_EXPR( ret, item, inFinish );
+ break;
+ case InlineItem::CallExpr:
+ CALL_EXPR( ret, item, targState, inFinish );
+ break;
+ case InlineItem::NextExpr:
+ NEXT_EXPR( ret, item, inFinish );
+ break;
+ case InlineItem::LmSwitch:
+ LM_SWITCH( ret, item, targState, inFinish );
+ break;
+ case InlineItem::LmSetActId:
+ SET_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokEnd:
+ SET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmGetTokEnd:
+ GET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmInitTokStart:
+ INIT_TOKSTART( ret, item );
+ break;
+ case InlineItem::LmInitAct:
+ INIT_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokStart:
+ SET_TOKSTART( ret, item );
+ break;
+ case InlineItem::SubAction:
+ SUB_ACTION( ret, item, targState, inFinish );
+ break;
+ case InlineItem::Break:
+ BREAK( ret, targState );
+ break;
+ }
+ }
+}
+/* Write out paths in line directives. Escapes any special characters. */
+string FsmCodeGen::LDIR_PATH( char *path )
+{
+ ostringstream ret;
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ ret << "\\\\";
+ else
+ ret << *pc;
+ }
+ return ret.str();
+}
+
+void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
+{
+ /* Write the preprocessor line info for going into the source file. */
+ lineDirective( ret, cgd->fileName, action->loc.line );
+
+ /* Write the block and close it off. */
+ ret << "\t{";
+ INLINE_LIST( ret, action->inlineList, targState, inFinish );
+ ret << "}\n";
+}
+
+void FsmCodeGen::CONDITION( ostream &ret, Action *condition )
+{
+ ret << "\n";
+ lineDirective( ret, cgd->fileName, condition->loc.line );
+ INLINE_LIST( ret, condition->inlineList, 0, false );
+}
+
+string FsmCodeGen::ERROR_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->errState != 0 )
+ ret << redFsm->errState->id;
+ else
+ ret << "-1";
+ return ret.str();
+}
+
+string FsmCodeGen::FIRST_FINAL_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->firstFinState != 0 )
+ ret << redFsm->firstFinState->id;
+ else
+ ret << redFsm->nextStateId;
+ return ret.str();
+}
+
+void FsmCodeGen::writeOutInit()
+{
+ out << " {\n";
+ out << "\t" << CS() << " = " << START() << ";\n";
+
+ /* If there are any calls, then the stack top needs initialization. */
+ if ( anyActionCalls() || anyActionRets() )
+ out << "\t" << TOP() << " = 0;\n";
+
+ if ( cgd->hasLongestMatch ) {
+ out <<
+ " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
+ " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
+ " " << ACT() << " = 0;\n";
+ }
+ out << " }\n";
+}
+
+string FsmCodeGen::DATA_PREFIX()
+{
+ if ( cgd->dataPrefix )
+ return FSM_NAME() + "_";
+ return "";
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::ALPH_TYPE()
+{
+ string ret = keyOps->alphType->data1;
+ if ( keyOps->alphType->data2 != 0 ) {
+ ret += " ";
+ ret += + keyOps->alphType->data2;
+ }
+ return ret;
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::WIDE_ALPH_TYPE()
+{
+ string ret;
+ if ( maxKey <= keyOps->maxKey )
+ ret = ALPH_TYPE();
+ else {
+ long long maxKeyVal = maxKey.getLongLong();
+ HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
+ assert( wideType != 0 );
+
+ ret = wideType->data1;
+ if ( wideType->data2 != 0 ) {
+ ret += " ";
+ ret += wideType->data2;
+ }
+ }
+ return ret;
+}
+
+
+/*
+ * Language specific, but style independent code generators functions.
+ */
+
+string CCodeGen::PTR_CONST()
+{
+ return "const ";
+}
+
+std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
+{
+ out << "static const " << type << " " << name << "[] = {\n";
+ return out;
+}
+
+std::ostream &CCodeGen::CLOSE_ARRAY()
+{
+ return out << "};\n";
+}
+
+std::ostream &CCodeGen::STATIC_VAR( string type, string name )
+{
+ out << "static const " << type << " " << name;
+ return out;
+}
+
+string CCodeGen::UINT( )
+{
+ return "unsigned int";
+}
+
+string CCodeGen::ARR_OFF( string ptr, string offset )
+{
+ return ptr + " + " + offset;
+}
+
+string CCodeGen::CAST( string type )
+{
+ return "(" + type + ")";
+}
+
+string CCodeGen::NULL_ITEM()
+{
+ return "0";
+}
+
+string CCodeGen::POINTER()
+{
+ return " *";
+}
+
+std::ostream &CCodeGen::SWITCH_DEFAULT()
+{
+ return out;
+}
+
+string CCodeGen::CTRL_FLOW()
+{
+ return "";
+}
+
+/*
+ * D Specific
+ */
+
+string DCodeGen::NULL_ITEM()
+{
+ return "null";
+}
+
+string DCodeGen::POINTER()
+{
+ // multiple items seperated by commas can also be pointer types.
+ return "* ";
+}
+
+string DCodeGen::PTR_CONST()
+{
+ return "";
+}
+
+std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
+{
+ out << "static const " << type << "[] " << name << " = [\n";
+ return out;
+}
+
+std::ostream &DCodeGen::CLOSE_ARRAY()
+{
+ return out << "];\n";
+}
+
+std::ostream &DCodeGen::STATIC_VAR( string type, string name )
+{
+ out << "static const " << type << " " << name;
+ return out;
+}
+
+string DCodeGen::ARR_OFF( string ptr, string offset )
+{
+ return "&" + ptr + "[" + offset + "]";
+}
+
+string DCodeGen::CAST( string type )
+{
+ return "cast(" + type + ")";
+}
+
+string DCodeGen::UINT( )
+{
+ return "uint";
+}
+
+std::ostream &DCodeGen::SWITCH_DEFAULT()
+{
+ out << " default: break;\n";
+ return out;
+}
+
+string DCodeGen::CTRL_FLOW()
+{
+ return "if (true) ";
+}
+
+
+/*
+ * Java Specific
+ */
+
+string JavaCodeGen::PTR_CONST()
+{
+ /* Not used in Java code. */
+ assert( false );
+ return "final";
+}
+
+std::ostream &JavaCodeGen::OPEN_ARRAY( string type, string name )
+{
+ out << "static final " << type << "[] " << name << " = {\n";
+ return out;
+}
+
+std::ostream &JavaCodeGen::CLOSE_ARRAY()
+{
+ return out << "};\n";
+}
+
+std::ostream &JavaCodeGen::STATIC_VAR( string type, string name )
+{
+ out << "static final " << type << " " << name;
+ return out;
+}
+
+string JavaCodeGen::UINT( )
+{
+ /* Not used. */
+ assert( false );
+ return "long";
+}
+
+string JavaCodeGen::ARR_OFF( string ptr, string offset )
+{
+ return ptr + " + " + offset;
+}
+
+string JavaCodeGen::CAST( string type )
+{
+ return "(" + type + ")";
+}
+
+string JavaCodeGen::NULL_ITEM()
+{
+ /* In java we use integers instead of pointers. */
+ return "-1";
+}
+
+string JavaCodeGen::POINTER()
+{
+ /* Not used. */
+ assert( false );
+ return " *";
+}
+
+std::ostream &JavaCodeGen::SWITCH_DEFAULT()
+{
+ return out;
+}
+
+string JavaCodeGen::GET_KEY()
+{
+ ostringstream ret;
+ if ( cgd->getKeyExpr != 0 ) {
+ /* Emit the user supplied method of retrieving the key. */
+ ret << "(";
+ INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
+ ret << ")";
+ }
+ else {
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << "data[" << P() << "]";
+ }
+ return ret.str();
+}
+
+string JavaCodeGen::CTRL_FLOW()
+{
+ return "if (true) ";
+}
+
diff --git a/rlcodegen/fsmcodegen.h b/rlcodegen/fsmcodegen.h
new file mode 100644
index 0000000..777c6fd
--- /dev/null
+++ b/rlcodegen/fsmcodegen.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMCODEGEN_H
+#define _FSMCODEGEN_H
+
+#include <iostream>
+#include <string>
+#include <stdio.h>
+#include "common.h"
+
+using std::string;
+using std::ostream;
+
+/* Integer array line length. */
+#define IALL 8
+
+/* Forwards. */
+struct RedFsmAp;
+struct RedStateAp;
+struct CodeGenData;
+struct Action;
+struct NameInst;
+struct InlineItem;
+struct InlineList;
+struct RedAction;
+struct LongestMatch;
+struct LongestMatchPart;
+
+inline string itoa( int i )
+{
+ char buf[16];
+ sprintf( buf, "%i", i );
+ return buf;
+}
+
+/*
+ * class FsmCodeGen
+ */
+class FsmCodeGen
+{
+public:
+ FsmCodeGen();
+ virtual ~FsmCodeGen() {}
+
+ virtual void writeOutData() = 0;
+ virtual void writeOutInit();
+ virtual void writeOutExec() = 0;
+ virtual void writeOutEOF() = 0;
+
+ /* Gather various info on the machine. */
+ void analyzeAction( Action *act, InlineList *inlineList );
+ void analyzeActionList( RedAction *redAct, InlineList *inlineList );
+ void analyzeMachine();
+
+protected:
+ friend struct CodeGenData;
+
+ string FSM_NAME();
+ string START_STATE_ID();
+ ostream &ACTIONS_ARRAY();
+ string GET_WIDE_KEY();
+ string GET_WIDE_KEY( RedStateAp *state );
+ string TABS( int level );
+ string KEY( Key key );
+ string LDIR_PATH( char *path );
+ void ACTION( ostream &ret, Action *action, int targState, bool inFinish );
+ void CONDITION( ostream &ret, Action *condition );
+ string ALPH_TYPE();
+ string WIDE_ALPH_TYPE();
+ string ARRAY_TYPE( unsigned long maxVal );
+
+ virtual string ARR_OFF( string ptr, string offset ) = 0;
+ virtual string CAST( string type ) = 0;
+ virtual string UINT() = 0;
+ virtual string NULL_ITEM() = 0;
+ virtual string POINTER() = 0;
+ virtual string GET_KEY();
+ virtual ostream &SWITCH_DEFAULT() = 0;
+
+ string P() { return "p"; }
+ string PE() { return "pe"; }
+
+ string ACCESS();
+ string CS();
+ string STACK() { return ACCESS() + "stack"; }
+ string TOP() { return ACCESS() + "top"; }
+ string TOKSTART() { return ACCESS() + "tokstart"; }
+ string TOKEND() { return ACCESS() + "tokend"; }
+ string ACT() { return ACCESS() + "act"; }
+
+ string DATA_PREFIX();
+ string PM() { return "_" + DATA_PREFIX() + "partition_map"; }
+ string C() { return "_" + DATA_PREFIX() + "cond_spaces"; }
+ string CK() { return "_" + DATA_PREFIX() + "cond_keys"; }
+ string K() { return "_" + DATA_PREFIX() + "trans_keys"; }
+ string I() { return "_" + DATA_PREFIX() + "indicies"; }
+ string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; }
+ string KO() { return "_" + DATA_PREFIX() + "key_offsets"; }
+ string IO() { return "_" + DATA_PREFIX() + "index_offsets"; }
+ string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; }
+ string SL() { return "_" + DATA_PREFIX() + "single_lengths"; }
+ string RL() { return "_" + DATA_PREFIX() + "range_lengths"; }
+ string A() { return "_" + DATA_PREFIX() + "actions"; }
+ string TA() { return "_" + DATA_PREFIX() + "trans_actions_wi"; }
+ string TT() { return "_" + DATA_PREFIX() + "trans_targs_wi"; }
+ string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; }
+ string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; }
+ string EA() { return "_" + DATA_PREFIX() + "eof_actions"; }
+ string SP() { return "_" + DATA_PREFIX() + "key_spans"; }
+ string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; }
+ string START() { return DATA_PREFIX() + "start"; }
+ string ERROR() { return DATA_PREFIX() + "error"; }
+ string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; }
+ string CTXDATA() { return DATA_PREFIX() + "ctxdata"; }
+
+ void INLINE_LIST( ostream &ret, InlineList *inlineList, int targState, bool inFinish );
+ virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0;
+ virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0;
+ virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0;
+ virtual void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0;
+ virtual void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0;
+ virtual void CALL_EXPR( ostream &ret, InlineItem *ilItem,
+ int targState, bool inFinish ) = 0;
+ virtual void RET( ostream &ret, bool inFinish ) = 0;
+ virtual void BREAK( ostream &ret, int targState ) = 0;
+ virtual void CURS( ostream &ret, bool inFinish ) = 0;
+ virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0;
+ void EXEC( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void SET_ACT( ostream &ret, InlineItem *item );
+ void INIT_TOKSTART( ostream &ret, InlineItem *item );
+ void INIT_ACT( ostream &ret, InlineItem *item );
+ void SET_TOKSTART( ostream &ret, InlineItem *item );
+ void SET_TOKEND( ostream &ret, InlineItem *item );
+ void GET_TOKEND( ostream &ret, InlineItem *item );
+ void SUB_ACTION( ostream &ret, InlineItem *item,
+ int targState, bool inFinish );
+
+ string ERROR_STATE();
+ string FIRST_FINAL_STATE();
+
+ virtual string PTR_CONST() = 0;
+ virtual ostream &OPEN_ARRAY( string type, string name ) = 0;
+ virtual ostream &CLOSE_ARRAY() = 0;
+ virtual ostream &STATIC_VAR( string type, string name ) = 0;
+
+ virtual string CTRL_FLOW() = 0;
+
+ unsigned int arrayTypeSize( unsigned long maxVal );
+
+ bool anyActions();
+ bool anyToStateActions() { return bAnyToStateActions; }
+ bool anyFromStateActions() { return bAnyFromStateActions; }
+ bool anyRegActions() { return bAnyRegActions; }
+ bool anyEofActions() { return bAnyEofActions; }
+ bool anyActionGotos() { return bAnyActionGotos; }
+ bool anyActionCalls() { return bAnyActionCalls; }
+ bool anyActionRets() { return bAnyActionRets; }
+ bool anyRegActionRets() { return bAnyRegActionRets; }
+ bool anyRegActionByValControl() { return bAnyRegActionByValControl; }
+ bool anyRegNextStmt() { return bAnyRegNextStmt; }
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool anyRegBreak() { return bAnyRegBreak; }
+ bool anyLmSwitchError() { return bAnyLmSwitchError; }
+ bool anyConditions() { return bAnyConditions; }
+
+ /* Set up labelNeeded flag for each state. Differs for each goto style so
+ * is virtual. */
+ virtual void setLabelsNeeded() {}
+
+ /* Determine if we should use indicies. */
+ virtual void calcIndexSize() {}
+
+ void findFinalActionRefs();
+ void assignActionIds();
+ void setValueLimits();
+
+ /* Are there any regular transition functions, any out transition functions. */
+ char *fsmName;
+ CodeGenData *cgd;
+ RedFsmAp *redFsm;
+
+ bool outLabelUsed;
+ bool againLabelUsed;
+
+protected:
+ ostream &out;
+
+ bool bAnyToStateActions;
+ bool bAnyFromStateActions;
+ bool bAnyRegActions;
+ bool bAnyEofActions;
+ bool bAnyActionGotos;
+ bool bAnyActionCalls;
+ bool bAnyActionRets;
+ bool bAnyRegActionRets;
+ bool bAnyRegActionByValControl;
+ bool bAnyRegNextStmt;
+ bool bAnyRegCurStateRef;
+ bool bAnyRegBreak;
+ bool bAnyLmSwitchError;
+ bool bAnyConditions;
+
+ int maxState;
+ int maxSingleLen;
+ int maxRangeLen;
+ int maxKeyOffset;
+ int maxIndexOffset;
+ int maxIndex;
+ int maxActListId;
+ int maxActionLoc;
+ int maxActArrItem;
+ unsigned long long maxSpan;
+ unsigned long long maxCondSpan;
+ int maxFlatIndexOffset;
+ Key maxKey;
+ int maxCondOffset;
+ int maxCondLen;
+ int maxCondSpaceId;
+ int maxCondIndexOffset;
+ int maxCond;
+
+ bool useIndicies;
+};
+
+class CCodeGen : virtual public FsmCodeGen
+{
+public:
+ virtual string NULL_ITEM();
+ virtual string POINTER();
+ virtual ostream &SWITCH_DEFAULT();
+ virtual ostream &OPEN_ARRAY( string type, string name );
+ virtual ostream &CLOSE_ARRAY();
+ virtual ostream &STATIC_VAR( string type, string name );
+ virtual string ARR_OFF( string ptr, string offset );
+ virtual string CAST( string type );
+ virtual string UINT();
+ virtual string PTR_CONST();
+ virtual string CTRL_FLOW();
+};
+
+class DCodeGen : virtual public FsmCodeGen
+{
+public:
+ virtual string NULL_ITEM();
+ virtual string POINTER();
+ virtual ostream &SWITCH_DEFAULT();
+ virtual ostream &OPEN_ARRAY( string type, string name );
+ virtual ostream &CLOSE_ARRAY();
+ virtual ostream &STATIC_VAR( string type, string name );
+ virtual string ARR_OFF( string ptr, string offset );
+ virtual string CAST( string type );
+ virtual string UINT();
+ virtual string PTR_CONST();
+ virtual string CTRL_FLOW();
+};
+
+class JavaCodeGen : virtual public FsmCodeGen
+{
+public:
+ virtual string NULL_ITEM();
+ virtual string POINTER();
+ virtual ostream &SWITCH_DEFAULT();
+ virtual ostream &OPEN_ARRAY( string type, string name );
+ virtual ostream &CLOSE_ARRAY();
+ virtual ostream &STATIC_VAR( string type, string name );
+ virtual string ARR_OFF( string ptr, string offset );
+ virtual string CAST( string type );
+ virtual string UINT();
+ virtual string PTR_CONST();
+ virtual string GET_KEY();
+ virtual string CTRL_FLOW();
+};
+
+#endif /* _FSMCODEGEN_H */
diff --git a/rlcodegen/ftabcodegen.cpp b/rlcodegen/ftabcodegen.cpp
new file mode 100644
index 0000000..2aba78b
--- /dev/null
+++ b/rlcodegen/ftabcodegen.cpp
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "ftabcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+/* Determine if we should use indicies or not. */
+void FTabCodeGen::calcIndexSize()
+{
+ int sizeWithInds = 0, sizeWithoutInds = 0;
+
+ /* Calculate cost of using with indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithInds += arrayTypeSize(maxIndex) * totalIndex;
+ }
+ sizeWithInds += arrayTypeSize(maxState) * redFsm->transSet.length();
+ if ( anyActions() )
+ sizeWithInds += arrayTypeSize(maxActListId) * redFsm->transSet.length();
+
+ /* Calculate the cost of not using indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithoutInds += arrayTypeSize(maxState) * totalIndex;
+ if ( anyActions() )
+ sizeWithoutInds += arrayTypeSize(maxActListId) * totalIndex;
+ }
+
+ /* If using indicies reduces the size, use them. */
+ useIndicies = sizeWithInds < sizeWithoutInds;
+}
+
+std::ostream &FTabCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FTabCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FTabCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ out << act;
+ return out;
+}
+
+
+/* Write out the function for a transition. */
+std::ostream &FTabCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ int action = 0;
+ if ( trans->action != 0 )
+ action = trans->action->actListId+1;
+ out << action;
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FTabCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void FTabCodeGen::writeOutData()
+{
+ if ( anyConditions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxCondOffset), CO() );
+ COND_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondLen), CL() );
+ COND_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondSpaceId), C() );
+ COND_SPACES();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( ARRAY_TYPE(maxKeyOffset), KO() );
+ KEY_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxSingleLen), SL() );
+ SINGLE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxRangeLen), RL() );
+ RANGE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxIndexOffset), IO() );
+ INDEX_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( useIndicies ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() );
+ TRANS_ACTIONS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+ else {
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActListId), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActListId), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+}
+
+void FTabCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _klen";
+
+ if ( anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " int _trans;\n";
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ out << "\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ out << "_match:\n";
+
+ if ( anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ if ( useIndicies )
+ out << " _trans = " << I() << "[_trans];\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " switch ( " << TA() << "[_trans] ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() )
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+
+void FTabCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/ftabcodegen.h b/rlcodegen/ftabcodegen.h
new file mode 100644
index 0000000..20e4663
--- /dev/null
+++ b/rlcodegen/ftabcodegen.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FTABCODEGEN_H
+#define _FTABCODEGEN_H
+
+#include <iostream>
+#include "tabcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+
+/*
+ * FTabCodeG\verb|e
+ */
+class FTabCodeGen : public TabCodeGen
+{
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+ virtual void calcIndexSize();
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+};
+
+
+/*
+ * CFTabCodeGen
+ */
+struct CFTabCodeGen
+ : public FTabCodeGen, public CCodeGen
+{
+};
+
+/*
+ * class DFTabCodeGen
+ */
+struct DFTabCodeGen
+ : public FTabCodeGen, public DCodeGen
+{
+};
+
+#endif /* _FTABCODEGEN_H */
diff --git a/rlcodegen/gendata.cpp b/rlcodegen/gendata.cpp
new file mode 100644
index 0000000..419e903
--- /dev/null
+++ b/rlcodegen/gendata.cpp
@@ -0,0 +1,563 @@
+/*
+ * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "gendata.h"
+
+/* Code Generators. */
+#include "gvdotgen.h"
+#include "tabcodegen.h"
+#include "ftabcodegen.h"
+#include "flatcodegen.h"
+#include "fflatcodegen.h"
+#include "gotocodegen.h"
+#include "fgotocodegen.h"
+#include "ipgotocodegen.h"
+#include "splitcodegen.h"
+#include "javacodegen.h"
+
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+CodeGenData *cgd = 0;
+
+void CodeGenData::createMachine()
+{
+ redFsm = new RedFsmAp();
+}
+
+void CodeGenData::initActionList( unsigned long length )
+{
+ allActions = new Action[length];
+ for ( unsigned long a = 0; a < length; a++ )
+ actionList.append( allActions+a );
+}
+
+void CodeGenData::newAction( int anum, char *name, int line,
+ int col, InlineList *inlineList )
+{
+ allActions[anum].actionId = anum;
+ allActions[anum].name = name;
+ allActions[anum].loc.line = line;
+ allActions[anum].loc.col = col;
+ allActions[anum].inlineList = inlineList;
+}
+
+void CodeGenData::initActionTableList( unsigned long length )
+{
+ allActionTables = new RedAction[length];
+}
+
+void CodeGenData::initStateList( unsigned long length )
+{
+ allStates = new RedStateAp[length];
+ for ( unsigned long s = 0; s < length; s++ )
+ redFsm->stateList.append( allStates+s );
+}
+
+void CodeGenData::setStartState( unsigned long startState )
+{
+ this->startState = startState;
+}
+
+void CodeGenData::addEntryPoint( char *name, unsigned long entryState )
+{
+ entryPointIds.append( entryState );
+ entryPointNames.append( name );
+}
+
+void CodeGenData::initTransList( int snum, unsigned long length )
+{
+ /* Could preallocate the out range to save time growing it. For now do
+ * nothing. */
+}
+
+void CodeGenData::newTrans( int snum, int tnum, Key lowKey,
+ Key highKey, long targ, long action )
+{
+ /* Get the current state and range. */
+ RedStateAp *curState = allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ /* Make the new transitions. */
+ RedStateAp *targState = targ >= 0 ? (allStates + targ) :
+ wantComplete ? redFsm->getErrorState() : 0;
+ RedAction *actionTable = action >= 0 ? (allActionTables + action) : 0;
+ RedTransAp *trans = redFsm->allocateTrans( targState, actionTable );
+ RedTransEl transEl( lowKey, highKey, trans );
+
+ if ( wantComplete ) {
+ /* If the machine is to be complete then we need to fill any gaps with
+ * the error transitions. */
+ if ( destRange.length() == 0 ) {
+ /* Range is currently empty. */
+ if ( keyOps->minKey < lowKey ) {
+ /* The first range doesn't start at the low end. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transition. */
+ RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ else {
+ /* The range list is not empty, get the the last range. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ Key nextKey = last->highKey;
+ nextKey.increment();
+ if ( nextKey < lowKey ) {
+ /* There is a gap to fill. Make the high key. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transtion. */
+ RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+
+ /* Filler taken care of. Append the range. */
+ destRange.append( RedTransEl( lowKey, highKey, trans ) );
+}
+
+void CodeGenData::finishTransList( int snum )
+{
+ /* Get the current state and range. */
+ RedStateAp *curState = allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ /* If building a complete machine we may need filler on the end. */
+ if ( wantComplete ) {
+ /* Check if there are any ranges already. */
+ if ( destRange.length() == 0 ) {
+ /* Fill with the whole alphabet. */
+ /* Add the range on the lower and upper bound. */
+ RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ else {
+ /* Get the last and check for a gap on the end. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ if ( last->highKey < keyOps->maxKey ) {
+ /* Make the high key. */
+ Key fillLowKey = last->highKey;
+ fillLowKey.increment();
+
+ /* Create the new range with the error trans and append it. */
+ RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+}
+
+void CodeGenData::setFinal( int snum )
+{
+ RedStateAp *curState = allStates + snum;
+ curState->isFinal = true;
+}
+
+
+void CodeGenData::setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction )
+{
+ RedStateAp *curState = allStates + snum;
+ if ( toStateAction >= 0 )
+ curState->toStateAction = allActionTables + toStateAction;
+ if ( fromStateAction >= 0 )
+ curState->fromStateAction = allActionTables + fromStateAction;
+ if ( eofAction >= 0 )
+ curState->eofAction = allActionTables + eofAction;
+}
+
+void CodeGenData::resolveTargetStates( InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call:
+ case InlineItem::Next: case InlineItem::Entry:
+ item->targState = allStates + item->targId;
+ break;
+ default:
+ break;
+ }
+
+ if ( item->children != 0 )
+ resolveTargetStates( item->children );
+ }
+}
+
+
+void CodeGenData::finishMachine()
+{
+ if ( redFsm->forcedErrorState )
+ redFsm->getErrorState();
+
+ /* We get the start state as an offset, set the pointer now. */
+ redFsm->startState = allStates + startState;
+ for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ )
+ redFsm->entryPoints.insert( allStates + *en );
+
+ for ( ActionList::Iter a = actionList; a.lte(); a++ )
+ resolveTargetStates( a->inlineList );
+
+ /* Note that even if we want a complete graph we do not give the error
+ * state a default transition. All machines break out of the processing
+ * loop when in the error state. */
+
+ if ( codeStyle == GenGoto || codeStyle == GenFGoto || codeStyle == GenIpGoto ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ )
+ st->stateCondVect.append( sci );
+ }
+ }
+}
+
+
+bool CodeGenData::setAlphType( char *data )
+{
+ /* FIXME: This should validate the alphabet type selection. */
+ HostType *alphType = hostLang->hostTypes + atoi(data);
+ thisKeyOps.setAlphType( alphType );
+ return true;
+}
+
+void CodeGenData::initCondSpaceList( ulong length )
+{
+ allCondSpaces = new CondSpace[length];
+ for ( ulong c = 0; c < length; c++ )
+ condSpaceList.append( allCondSpaces + c );
+}
+
+void CodeGenData::newCondSpace( int cnum, int condSpaceId, Key baseKey )
+{
+ CondSpace *cond = allCondSpaces + cnum;
+ cond->condSpaceId = condSpaceId;
+ cond->baseKey = baseKey;
+}
+
+void CodeGenData::condSpaceItem( int cnum, long condActionId )
+{
+ CondSpace *cond = allCondSpaces + cnum;
+ cond->condSet.append( allActions + condActionId );
+}
+
+void CodeGenData::initStateCondList( int snum, ulong length )
+{
+ /* Could preallocate these, as we could with transitions. */
+}
+
+void CodeGenData::addStateCond( int snum, Key lowKey, Key highKey, long condNum )
+{
+ RedStateAp *curState = allStates + snum;
+
+ /* Create the new state condition. */
+ StateCond *stateCond = new StateCond;
+ stateCond->lowKey = lowKey;
+ stateCond->highKey = highKey;
+
+ /* Assign it a cond space. */
+ CondSpace *condSpace = allCondSpaces + condNum;
+ stateCond->condSpace = condSpace;
+
+ curState->stateCondList.append( stateCond );
+}
+
+
+/* Generate the codegen depending on the command line options given. */
+void CodeGenData::makeCodeGen()
+{
+ switch ( hostLangType ) {
+ case CCode:
+ switch ( codeStyle ) {
+ case GenTables:
+ codeGen = new CTabCodeGen;
+ break;
+ case GenFTables:
+ codeGen = new CFTabCodeGen;
+ break;
+ case GenFlat:
+ codeGen = new CFlatCodeGen;
+ break;
+ case GenFFlat:
+ codeGen = new CFFlatCodeGen;
+ break;
+ case GenGoto:
+ codeGen = new CGotoCodeGen;
+ break;
+ case GenFGoto:
+ codeGen = new CFGotoCodeGen;
+ break;
+ case GenIpGoto:
+ codeGen = new CIpGotoCodeGen;
+ break;
+ case GenSplit:
+ codeGen = new CSplitCodeGen;
+ break;
+ }
+ break;
+
+ case DCode:
+ switch ( codeStyle ) {
+ case GenTables:
+ codeGen = new DTabCodeGen;
+ break;
+ case GenFTables:
+ codeGen = new DFTabCodeGen;
+ break;
+ case GenFlat:
+ codeGen = new DFlatCodeGen;
+ break;
+ case GenFFlat:
+ codeGen = new DFFlatCodeGen;
+ break;
+ case GenGoto:
+ codeGen = new DGotoCodeGen;
+ break;
+ case GenFGoto:
+ codeGen = new DFGotoCodeGen;
+ break;
+ case GenIpGoto:
+ codeGen = new DIpGotoCodeGen;
+ break;
+ case GenSplit:
+ codeGen = new DSplitCodeGen;
+ break;
+ }
+ break;
+
+ case JavaCode:
+ switch ( codeStyle ) {
+ case GenTables:
+ codeGen = new JavaTabCodeGen;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ break;
+ }
+
+ codeGen->fsmName = fsmName;
+ codeGen->cgd = this;
+}
+
+CondSpace *CodeGenData::findCondSpace( Key lowKey, Key highKey )
+{
+ for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) {
+ Key csHighKey = cs->baseKey;
+ csHighKey += keyOps->alphSize() * (1 << cs->condSet.length());
+
+ if ( lowKey >= cs->baseKey && highKey <= csHighKey )
+ return cs;
+ }
+ return 0;
+}
+
+Condition *CodeGenData::findCondition( Key key )
+{
+ for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) {
+ Key upperKey = cond->baseKey + (1 << cond->condSet.length());
+ if ( cond->baseKey <= key && key <= upperKey )
+ return cond;
+ }
+ return 0;
+}
+
+Key CodeGenData::findMaxKey()
+{
+ Key maxKey = keyOps->maxKey;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ assert( st->outSingle.length() == 0 );
+ assert( st->defTrans == 0 );
+
+ long rangeLen = st->outRange.length();
+ if ( rangeLen > 0 ) {
+ Key highKey = st->outRange[rangeLen-1].highKey;
+ if ( highKey > maxKey )
+ maxKey = highKey;
+ }
+ }
+ return maxKey;
+}
+
+/* Generate the code for an fsm. Assumes parseData is set up properly. Called
+ * by parser code. */
+void CodeGenData::prepareMachine()
+{
+ if ( hasBeenPrepared )
+ return;
+ hasBeenPrepared = true;
+
+ /* Do this before distributing transitions out to singles and defaults
+ * makes life easier. */
+ Key maxKey = findMaxKey();
+
+ redFsm->assignActionLocs();
+
+ /* Order the states. */
+ redFsm->depthFirstOrdering();
+
+ if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
+ codeStyle == GenIpGoto || codeStyle == GenSplit )
+ {
+ /* For goto driven machines we can keep the original depth
+ * first ordering because it's ok if the state ids are not
+ * sequential. Split the the ids by final state status. */
+ redFsm->sortStateIdsByFinal();
+ }
+ else {
+ /* For table driven machines the location of the state is used to
+ * identify it so the states must be sorted by their final ids.
+ * Though having a deterministic ordering is important,
+ * specifically preserving the depth first ordering is not because
+ * states are stored in tables. */
+ redFsm->sortStatesByFinal();
+ redFsm->sequentialStateIds();
+ }
+
+ /* Find the first final state. This is the final state with the lowest
+ * id. */
+ redFsm->findFirstFinState();
+
+ /* Choose default transitions and the single transition. */
+ redFsm->chooseDefaultSpan();
+
+ /* Maybe do flat expand, otherwise choose single. */
+ if ( codeStyle == GenFlat || codeStyle == GenFFlat )
+ redFsm->makeFlat();
+ else
+ redFsm->chooseSingle();
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return;
+
+ if ( codeStyle == GenSplit )
+ redFsm->partitionFsm( numSplitPartitions );
+
+ if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
+ redFsm->setInTrans();
+
+ /* Make a code generator that will output the header/code. */
+ if ( codeGen == 0 )
+ makeCodeGen();
+ codeGen->redFsm = redFsm;
+
+ /* Anlayze Machine will find the final action reference counts, among
+ * other things. We will use these in reporting the usage
+ * of fsm directives in action code. */
+ codeGen->analyzeMachine();
+ codeGen->maxKey = maxKey;
+}
+
+void CodeGenData::generateGraphviz()
+{
+ /* Do ordering and choose state ids. */
+ redFsm->depthFirstOrdering();
+ redFsm->sequentialStateIds();
+
+ /* For dot file generation we want to pick default transitions. */
+ redFsm->chooseDefaultSpan();
+
+ /* Make the generator. */
+ GraphvizDotGen dotGen( fsmName, this, redFsm, *outStream );
+
+ /* Write out with it. */
+ dotGen.writeDotFile();
+}
+
+void CodeGenData::generateCode()
+{
+ if ( writeOps & WO_NOEND )
+ hasEnd = false;
+
+ if ( writeOps & WO_NOERROR )
+ writeErr = false;
+
+ if ( writeOps & WO_NOPREFIX )
+ dataPrefix = false;
+
+ if ( writeOps & WO_NOFF )
+ writeFirstFinal = false;
+
+ if ( writeData || writeInit || writeExec || writeEOF ) {
+ prepareMachine();
+
+ /* Force a newline. */
+ *outStream << "\n";
+ genLineDirective( *outStream );
+ }
+
+
+ if ( writeExec ) {
+ /* Must set labels immediately before writing because we may depend
+ * on the noend write option. */
+ codeGen->setLabelsNeeded();
+ }
+
+ if ( writeData )
+ codeGen->writeOutData();
+
+ if ( writeInit )
+ codeGen->writeOutInit();
+
+ if ( writeExec )
+ codeGen->writeOutExec();
+
+ if ( writeEOF )
+ codeGen->writeOutEOF();
+}
+
+void CodeGenData::generate()
+{
+ if ( redFsm != 0 ) {
+ if ( outputFormat == OutCode )
+ generateCode();
+ else if ( outputFormat == OutGraphvizDot && !graphvizDone ) {
+ graphvizDone = true;
+ generateGraphviz();
+ }
+ }
+}
+
+void lineDirective( ostream &out, char *fileName, int line )
+{
+ if ( hostLangType != JavaCode ) {
+ /* Write the preprocessor line info for to the input file. */
+ out << "#line " << line << " \"";
+ for ( char *pc = fileName; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+ out << "\"\n";
+ }
+}
+
+void genLineDirective( ostream &out )
+{
+ lineDirective( out, outputFileName, outFilter->line + 1 );
+}
diff --git a/rlcodegen/gendata.h b/rlcodegen/gendata.h
new file mode 100644
index 0000000..69d4243
--- /dev/null
+++ b/rlcodegen/gendata.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GENDATA_H
+#define _GENDATA_H
+
+#include <iostream>
+#include "redfsm.h"
+#include "fsmcodegen.h"
+#include "common.h"
+
+struct NameInst;
+typedef DList<Action> ActionList;
+
+typedef unsigned long ulong;
+
+typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap;
+typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl;
+
+#define WO_NOEND 0x01
+#define WO_NOERROR 0x02
+#define WO_NOPREFIX 0x04
+#define WO_NOFF 0x08
+
+struct CodeGenData
+{
+ CodeGenData( char *fileName, char *fsmName, bool wantComplete )
+ :
+ fileName(fileName),
+ fsmName(fsmName),
+ redFsm(0),
+ allActions(0),
+ allActionTables(0),
+ allConditions(0),
+ allCondSpaces(0),
+ allStates(0),
+ nameIndex(0),
+ startState(0),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ codeGen(0),
+ wantComplete(wantComplete),
+ writeOps(0),
+ writeData(false),
+ writeInit(false),
+ writeExec(false),
+ writeEOF(false),
+ hasLongestMatch(false),
+ hasEnd(true),
+ dataPrefix(true),
+ writeFirstFinal(true),
+ writeErr(true),
+ hasBeenPrepared(false)
+ { }
+
+ /*
+ * Collecting the machine.
+ */
+
+ char *fileName;
+ char *fsmName;
+ RedFsmAp *redFsm;
+ Action *allActions;
+ RedAction *allActionTables;
+ Condition *allConditions;
+ CondSpace *allCondSpaces;
+ RedStateAp *allStates;
+ NameInst **nameIndex;
+ int startState;
+ ActionList actionList;
+ ConditionList conditionList;
+ CondSpaceList condSpaceList;
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+ FsmCodeGen *codeGen;
+ KeyOps thisKeyOps;
+ bool wantComplete;
+ int writeOps;
+ bool writeData;
+ bool writeInit;
+ bool writeExec;
+ bool writeEOF;
+ EntryIdVect entryPointIds;
+ EntryNameVect entryPointNames;
+ bool hasLongestMatch;
+
+ /* Write options. */
+ bool hasEnd;
+ bool dataPrefix;
+ bool writeFirstFinal;
+ bool writeErr;
+
+ void createMachine();
+ void initActionList( unsigned long length );
+ void newAction( int anum, char *name, int line, int col, InlineList *inlineList );
+ void initActionTableList( unsigned long length );
+ void initStateList( unsigned long length );
+ void setStartState( unsigned long startState );
+ void addEntryPoint( char *name, unsigned long entryState );
+ void setFinal( int snum );
+ void initTransList( int snum, unsigned long length );
+ void newTrans( int snum, int tnum, Key lowKey, Key highKey,
+ long targ, long act );
+ void finishTransList( int snum );
+ void setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction );
+ void finishMachine();
+ void setForcedErrorState()
+ { redFsm->forcedErrorState = true; }
+
+ void initCondSpaceList( ulong length );
+ void condSpaceItem( int cnum, long condActionId );
+ void newCondSpace( int cnum, int condSpaceId, Key baseKey );
+
+ void initStateCondList( int snum, ulong length );
+ void addStateCond( int snum, Key lowKey, Key highKey, long condNum );
+
+ CondSpace *findCondSpace( Key lowKey, Key highKey );
+ Condition *findCondition( Key key );
+
+ bool setAlphType( char *data );
+
+ void makeCodeGen();
+ void generateGraphviz();
+ void resolveTargetStates( InlineList *inlineList );
+ Key findMaxKey();
+
+ void generate();
+ void generateCode();
+ void prepareMachine();
+ bool hasBeenPrepared;
+};
+
+extern CodeGenData *cgd;
+
+void lineDirective( ostream &out, char *fileName, int line );
+void genLineDirective( ostream &out );
+
+#endif /* _GENDATA_H */
diff --git a/rlcodegen/gotocodegen.cpp b/rlcodegen/gotocodegen.cpp
new file mode 100644
index 0000000..2b764c1
--- /dev/null
+++ b/rlcodegen/gotocodegen.cpp
@@ -0,0 +1,754 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "gotocodegen.h"
+#include "redfsm.h"
+#include "bstmap.h"
+#include "gendata.h"
+
+/* Emit the goto to take for a given transition. */
+std::ostream &GotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ return out;
+}
+
+std::ostream &GotoCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void GotoCodeGen::GOTO_HEADER( RedStateAp *state )
+{
+ /* Label the state. */
+ out << "case " << state->id << ":\n";
+}
+
+
+void GotoCodeGen::emitSingleSwitch( RedStateAp *state )
+{
+ /* Load up the singles. */
+ int numSingles = state->outSingle.length();
+ RedTransEl *data = state->outSingle.data;
+
+ if ( numSingles == 1 ) {
+ /* If there is a single single key then write it out as an if. */
+ out << "\tif ( " << GET_WIDE_KEY(state) << " == " <<
+ KEY(data[0].lowKey) << " )\n\t\t";
+
+ /* Virtual function for writing the target of the transition. */
+ TRANS_GOTO(data[0].value, 0) << "\n";
+ }
+ else if ( numSingles > 1 ) {
+ /* Write out single keys in a switch if there is more than one. */
+ out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n";
+
+ /* Write out the single indicies. */
+ for ( int j = 0; j < numSingles; j++ ) {
+ out << "\t\tcase " << KEY(data[j].lowKey) << ": ";
+ TRANS_GOTO(data[j].value, 0) << "\n";
+ }
+
+ /* Emits a default case for D code. */
+ SWITCH_DEFAULT();
+
+ /* Close off the transition switch. */
+ out << "\t}\n";
+ }
+}
+
+void GotoCodeGen::emitRangeBSearch( RedStateAp *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ RedTransEl *data = state->outRange.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid].lowKey == keyOps->minKey;
+ bool limitHigh = data[mid].highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " <<
+ KEY(data[mid].lowKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+}
+
+void GotoCodeGen::STATE_GOTO_ERROR()
+{
+ /* Label the state and bail immediately. */
+ outLabelUsed = true;
+ RedStateAp *state = redFsm->errState;
+ out << "case " << state->id << ":\n";
+ out << " goto _out;\n";
+}
+
+void GotoCodeGen::COND_TRANSLATE( StateCond *stateCond, int level )
+{
+ CondSpace *condSpace = stateCond->condSpace;
+ out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(level) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+}
+
+void GotoCodeGen::emitCondBSearch( RedStateAp *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ StateCond **data = state->stateCondVect.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid]->lowKey == keyOps->minKey;
+ bool limitHigh = data[mid]->highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " >= " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " && " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " )\n {";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ COND_TRANSLATE(data[mid], level);
+ }
+ }
+}
+
+std::ostream &GotoCodeGen::STATE_GOTOS()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st );
+
+ if ( st->stateCondVect.length() > 0 ) {
+ out << " _widec = " << GET_KEY() << ";\n";
+ emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
+ }
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ return out;
+}
+
+std::ostream &GotoCodeGen::TRANSITIONS()
+{
+ /* Emit any transitions that have functions and that go to
+ * this state. */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Write the label for the transition so it can be jumped to. */
+ out << " tr" << trans->id << ": ";
+
+ /* Destination state. */
+ if ( trans->action != 0 && trans->action->anyCurStateRef() )
+ out << "_ps = " << CS() << ";";
+ out << CS() << " = " << trans->targ->id << "; ";
+
+ if ( trans->action != 0 ) {
+ /* Write out the transition func. */
+ out << "goto f" << trans->action->actListId << ";\n";
+ }
+ else {
+ /* No code to execute, just loop around. */
+ out << "goto _again;\n";
+ }
+ }
+ return out;
+}
+
+std::ostream &GotoCodeGen::EXEC_FUNCS()
+{
+ /* Make labels that set acts and jump to execFuncs. Loop func indicies. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ out << " f" << redAct->actListId << ": " <<
+ "_acts = " << ARR_OFF(A(), itoa( redAct->location+1 ) ) << ";"
+ " goto execFuncs;\n";
+ }
+ }
+
+ out <<
+ "\n"
+ "execFuncs:\n"
+ " _nacts = *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " goto _again;\n";
+ return out;
+}
+
+unsigned int GotoCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+unsigned int GotoCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+unsigned int GotoCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ return act;
+}
+
+std::ostream &GotoCodeGen::TO_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = TO_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::FROM_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = FROM_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::EOF_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = EOF_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::FINISH_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* States that are final and have an out action need a case. */
+ if ( st->eofAction != 0 ) {
+ /* Write the case label. */
+ out << "\t\tcase " << st->id << ": ";
+
+ /* Write the goto func. */
+ out << "goto f" << st->eofAction->actListId << ";\n";
+ }
+ }
+
+ return out;
+}
+
+void GotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void GotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void GotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void GotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void GotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void GotoCodeGen::writeOutData()
+{
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+}
+
+void GotoCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ out << "\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " ) {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ TRANSITIONS() <<
+ "\n";
+
+ if ( anyRegActions() )
+ EXEC_FUNCS() << "\n";
+
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void GotoCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/gotocodegen.h b/rlcodegen/gotocodegen.h
new file mode 100644
index 0000000..352e63e
--- /dev/null
+++ b/rlcodegen/gotocodegen.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GOTOCODEGEN_H
+#define _GOTOCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+struct StateCond;
+
+/*
+ * Goto driven fsm.
+ */
+class GotoCodeGen : virtual public FsmCodeGen
+{
+public:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &STATE_GOTOS();
+ std::ostream &TRANSITIONS();
+ std::ostream &EXEC_FUNCS();
+ std::ostream &FINISH_CASES();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual unsigned int TO_STATE_ACTION( RedStateAp *state );
+ virtual unsigned int FROM_STATE_ACTION( RedStateAp *state );
+ virtual unsigned int EOF_ACTION( RedStateAp *state );
+
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+
+ void COND_TRANSLATE( StateCond *stateCond, int level );
+ void emitCondBSearch( RedStateAp *state, int level, int low, int high );
+ void STATE_CONDS( RedStateAp *state, bool genDefault );
+
+ virtual std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+
+ void emitSingleSwitch( RedStateAp *state );
+ void emitRangeBSearch( RedStateAp *state, int level, int low, int high );
+
+ /* Called from STATE_GOTOS just before writing the gotos */
+ virtual void GOTO_HEADER( RedStateAp *state );
+ virtual void STATE_GOTO_ERROR();
+
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+};
+
+/*
+ * class CGotoCodeGen
+ */
+struct CGotoCodeGen
+ : public GotoCodeGen, public CCodeGen
+{
+};
+
+/*
+ * class DGotoCodeGen
+ */
+struct DGotoCodeGen
+ : public GotoCodeGen, public DCodeGen
+{
+};
+
+
+#endif /* _GOTOCODEGEN_H */
diff --git a/rlcodegen/gvdotgen.cpp b/rlcodegen/gvdotgen.cpp
new file mode 100644
index 0000000..6ea4a09
--- /dev/null
+++ b/rlcodegen/gvdotgen.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "rlcodegen.h"
+#include "gvdotgen.h"
+#include "gendata.h"
+#include "redfsm.h"
+
+using namespace std;
+
+GraphvizDotGen::GraphvizDotGen( char *fsmName, CodeGenData *cgd,
+ RedFsmAp *redFsm, ostream &out )
+:
+ fsmName(fsmName),
+ cgd(cgd),
+ redFsm(redFsm),
+ out(out)
+{
+}
+
+std::ostream &GraphvizDotGen::KEY( Key key )
+{
+ if ( printPrintables && key.isPrintable() ) {
+ // Output values as characters, ensuring we escape the quote (") character
+ char cVal = (char) key.getVal();
+ out << "'";
+ switch ( cVal ) {
+ case '"': case '\\':
+ out << "\\" << cVal;
+ break;
+ default:
+ out << cVal;
+ break;
+ }
+ out << "'";
+ }
+ else {
+ if ( keyOps->isSigned )
+ out << key.getVal();
+ else
+ out << (unsigned long) key.getVal();
+ }
+
+ return out;
+}
+
+std::ostream &GraphvizDotGen::TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans )
+{
+ int n = 0;
+ RedAction *actions[3];
+
+ if ( fromState->fromStateAction != 0 )
+ actions[n++] = fromState->fromStateAction;
+ if ( trans->action != 0 )
+ actions[n++] = trans->action;
+ if ( trans->targ != 0 && trans->targ->toStateAction != 0 )
+ actions[n++] = trans->targ->toStateAction;
+
+ if ( n > 0 )
+ out << " / ";
+
+ /* Loop the existing actions and write out what's there. */
+ for ( int a = 0; a < n; a++ ) {
+ for ( ActionTable::Iter actIt = actions[a]->key.first(); actIt.lte(); actIt++ ) {
+ Action *action = actIt->value;
+ out << action->nameOrLoc();
+ if ( a < n-1 || !actIt.last() )
+ out << ", ";
+ }
+ }
+ return out;
+}
+
+std::ostream &GraphvizDotGen::ACTION( RedAction *action )
+{
+ /* The action. */
+ out << " / ";
+ for ( ActionTable::Iter actIt = action->key.first(); actIt.lte(); actIt++ ) {
+ Action *action = actIt->value;
+ if ( action->name != 0 )
+ out << action->name;
+ else
+ out << action->loc.line << ":" << action->loc.col;
+ if ( !actIt.last() )
+ out << ", ";
+ }
+ return out;
+}
+
+std::ostream &GraphvizDotGen::ONCHAR( Key lowKey, Key highKey )
+{
+ if ( lowKey > keyOps->maxKey ) {
+ CondSpace *condSpace = cgd->findCondSpace( lowKey, highKey );
+ Key values = ( lowKey - condSpace->baseKey ) / keyOps->alphSize();
+
+ lowKey = keyOps->minKey +
+ (lowKey - condSpace->baseKey - keyOps->alphSize() * values.getVal());
+ highKey = keyOps->minKey +
+ (highKey - condSpace->baseKey - keyOps->alphSize() * values.getVal());
+ KEY( lowKey );
+ if ( lowKey != highKey ) {
+ out << "..";
+ KEY( highKey );
+ }
+ out << "(";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ bool set = values & (1 << csi.pos());
+ if ( !set )
+ out << "!";
+ out << (*csi)->nameOrLoc();
+ if ( !csi.last() )
+ out << ", ";
+ }
+ out << ")";
+ }
+ else {
+ /* Output the key. Possibly a range. */
+ KEY( lowKey );
+ if ( highKey != lowKey ) {
+ out << "..";
+ KEY( highKey );
+ }
+ }
+ return out;
+}
+
+void GraphvizDotGen::writeTransList( RedStateAp *state )
+{
+ /* Build the set of unique transitions out of this state. */
+ RedTransSet stTransSet;
+ for ( RedTransList::Iter tel = state->outRange; tel.lte(); tel++ ) {
+ /* If we haven't seen the transitions before, the move forward
+ * emitting all the transitions on the same character. */
+ if ( stTransSet.insert( tel->value ) ) {
+ /* Write out the from and to states. */
+ out << "\t" << state->id << " -> ";
+
+ if ( tel->value->targ == 0 )
+ out << "err_" << state->id;
+ else
+ out << tel->value->targ->id;
+
+ /* Begin the label. */
+ out << " [ label = \"";
+ ONCHAR( tel->lowKey, tel->highKey );
+
+ /* Walk the transition list, finding the same. */
+ for ( RedTransList::Iter mtel = tel.next(); mtel.lte(); mtel++ ) {
+ if ( mtel->value == tel->value ) {
+ out << ", ";
+ ONCHAR( mtel->lowKey, mtel->highKey );
+ }
+ }
+
+ /* Write the action and close the transition. */
+ TRANS_ACTION( state, tel->value );
+ out << "\" ];\n";
+ }
+ }
+
+ /* Write the default transition. */
+ if ( state->defTrans != 0 ) {
+ /* Write out the from and to states. */
+ out << "\t" << state->id << " -> ";
+
+ if ( state->defTrans->targ == 0 )
+ out << "err_" << state->id;
+ else
+ out << state->defTrans->targ->id;
+
+ /* Begin the label. */
+ out << " [ label = \"DEF";
+
+ /* Write the action and close the transition. */
+ TRANS_ACTION( state, state->defTrans );
+ out << "\" ];\n";
+ }
+}
+
+void GraphvizDotGen::writeDotFile( )
+{
+ out <<
+ "digraph " << fsmName << " {\n"
+ " rankdir=LR;\n";
+
+ /* Define the psuedo states. Transitions will be done after the states
+ * have been defined as either final or not final. */
+ out << " node [ shape = point ];\n";
+ out << " ENTRY;\n";
+
+ /* Psuedo states for entry points in the entry map. */
+ for ( EntryIdVect::Iter en = cgd->entryPointIds; en.lte(); en++ ) {
+ RedStateAp *state = cgd->allStates + *en;
+ out << " en_" << state->id << ";\n";
+ }
+
+ /* Psuedo states for final states with eof actions. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->eofAction != 0 )
+ out << " eof_" << st->id << ";\n";
+ }
+
+ out << " node [ shape = circle, height = 0.2 ];\n";
+
+ /* Psuedo states for states whose default actions go to error. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ bool needsErr = false;
+ if ( st->defTrans != 0 && st->defTrans->targ == 0 )
+ needsErr = true;
+ else {
+ for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) {
+ if ( tel->value->targ == 0 ) {
+ needsErr = true;
+ break;
+ }
+ }
+ }
+
+ if ( needsErr )
+ out << " err_" << st->id << " [ label=\"\"];\n";
+ }
+
+ /* Attributes common to all nodes, plus double circle for final states. */
+ out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n";
+
+ /* List Final states. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->isFinal )
+ out << " " << st->id << ";\n";
+ }
+
+ /* List transitions. */
+ out << " node [ shape = circle ];\n";
+
+ /* Walk the states. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ writeTransList( st );
+
+ /* Transitions into the start state. */
+ out << " ENTRY -> " << redFsm->startState->id << " [ label = \"IN";
+ out << "\" ];\n";
+
+ /* Transitions into the entry points. */
+ for ( EntryIdVect::Iter en = cgd->entryPointIds; en.lte(); en++ ) {
+ RedStateAp *state = cgd->allStates + *en;
+ char *name = cgd->entryPointNames[en.pos()];
+ out << " en_" << state->id << " -> " << state->id <<
+ " [ label = \"" << name << "\" ];\n";
+ }
+
+ /* Out action transitions. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->eofAction != 0 ) {
+ out << " " << st->id << " -> eof_" <<
+ st->id << " [ label = \"EOF";
+ ACTION( st->eofAction ) << "\" ];\n";
+ }
+ }
+
+ out <<
+ "}\n";
+}
diff --git a/rlcodegen/gvdotgen.h b/rlcodegen/gvdotgen.h
new file mode 100644
index 0000000..3dfcebc
--- /dev/null
+++ b/rlcodegen/gvdotgen.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GVDOTGEN_H
+#define _GVDOTGEN_H
+
+#include <iostream>
+#include "redfsm.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+class GraphvizDotGen
+{
+public:
+ GraphvizDotGen( char *fsmName, CodeGenData *cgd,
+ RedFsmAp *redFsm, std::ostream &out );
+
+ /* Print an fsm to out stream. */
+ void writeTransList( RedStateAp *state );
+ void writeDotFile( );
+
+private:
+ /* Writing labels and actions. */
+ std::ostream &ONCHAR( Key lowKey, Key highKey );
+ std::ostream &TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans );
+ std::ostream &ACTION( RedAction *action );
+ std::ostream &KEY( Key key );
+
+ char *fsmName;
+ CodeGenData *cgd;
+ RedFsmAp *redFsm;
+ std::ostream &out;
+};
+
+
+#endif /* _GVDOTGEN_H */
diff --git a/rlcodegen/ipgotocodegen.cpp b/rlcodegen/ipgotocodegen.cpp
new file mode 100644
index 0000000..5100fdf
--- /dev/null
+++ b/rlcodegen/ipgotocodegen.cpp
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "ipgotocodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include "bstmap.h"
+
+void IpGotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CTRL_FLOW() << "goto st" << gotoDest << ";}";
+}
+
+void IpGotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << targState <<
+ "; " << CTRL_FLOW() << "goto st" << callDest << ";}";
+}
+
+void IpGotoCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void IpGotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void IpGotoCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void IpGotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << targState;
+}
+
+void IpGotoCodeGen::BREAK( ostream &ret, int targState )
+{
+ ret << CTRL_FLOW() << "goto _out" << targState << ";";
+}
+
+bool IpGotoCodeGen::IN_TRANS_ACTIONS( RedStateAp *state )
+{
+ bool anyWritten = false;
+
+ /* Emit any transitions that have actions and that go to this state. */
+ for ( int it = 0; it < state->numInTrans; it++ ) {
+ RedTransAp *trans = state->inTrans[it];
+ if ( trans->action != 0 && trans->labelNeeded ) {
+ /* Remember that we wrote an action so we know to write the
+ * line directive for going back to the output. */
+ anyWritten = true;
+
+ /* Write the label for the transition so it can be jumped to. */
+ out << "tr" << trans->id << ":\n";
+
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+
+ /* If the action contains a next then we need to reload, otherwise
+ * jump directly to the target state. */
+ if ( trans->action->anyNextStmt() )
+ out << "\tgoto _again;\n";
+ else
+ out << "\tgoto st" << trans->targ->id << ";\n";
+ }
+ }
+
+ return anyWritten;
+}
+
+/* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for each
+ * state. */
+void IpGotoCodeGen::GOTO_HEADER( RedStateAp *state )
+{
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Advance and test buffer pos. */
+ if ( state->labelNeeded ) {
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out" << state->id << ";\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+void IpGotoCodeGen::STATE_GOTO_ERROR()
+{
+ /* In the error state we need to emit some stuff that usually goes into
+ * the header. */
+ RedStateAp *state = redFsm->errState;
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ /* No case label needed since we don't switch on the error state. */
+ if ( anyWritten )
+ genLineDirective( out );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ /* Break out here. */
+ out << " goto _out" << state->id << ";\n";
+}
+
+
+/* Emit the goto to take for a given transition. */
+std::ostream &IpGotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::EXIT_STATES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->outNeeded ) {
+ outLabelUsed = true;
+ out << " _out" << st->id << ": " << CS() << " = " <<
+ st->id << "; goto _out; \n";
+ }
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::AGAIN_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ out <<
+ " case " << st->id << ": goto st" << st->id << ";\n";
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::FINISH_CASES()
+{
+ bool anyWritten = false;
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->eofAction != 0 ) {
+ if ( st->eofAction->eofRefs == 0 )
+ st->eofAction->eofRefs = new IntSet;
+ st->eofAction->eofRefs->insert( st->id );
+ }
+ }
+
+ for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ if ( act->eofRefs != 0 ) {
+ for ( IntSet::Iter pst = *act->eofRefs; pst.lte(); pst++ )
+ out << " case " << *pst << ": \n";
+
+ /* Remember that we wrote a trans so we know to write the
+ * line directive for going back to the output. */
+ anyWritten = true;
+
+ /* Write each action in the eof action list. */
+ for ( ActionTable::Iter item = act->key; item.lte(); item++ )
+ ACTION( out, item->value, STATE_ERR_STATE, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+ return out;
+}
+
+void IpGotoCodeGen::setLabelsNeeded( InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call: {
+ /* Mark the target as needing a label. */
+ item->targState->labelNeeded = true;
+ break;
+ }
+ default: break;
+ }
+
+ if ( item->children != 0 )
+ setLabelsNeeded( item->children );
+ }
+}
+
+/* Set up labelNeeded flag for each state. */
+void IpGotoCodeGen::setLabelsNeeded()
+{
+ /* If we use the _again label, then we the _again switch, which uses all
+ * labels. */
+ if ( useAgainLabel() ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = true;
+ }
+ else {
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+
+ /* Need labels for states that have goto or calls in action code
+ * invoked on characters (ie, not from out action code). */
+ if ( trans->action != 0 ) {
+ /* Loop the actions. */
+ for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) {
+ /* Get the action and walk it's tree. */
+ setLabelsNeeded( act->value->inlineList );
+ }
+ }
+ }
+ }
+
+ if ( cgd->hasEnd ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+ }
+ else {
+ if ( redFsm->errState != 0 )
+ redFsm->errState->outNeeded = true;
+
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Any state with a transition in that has a break will need an
+ * out label. */
+ if ( trans->action != 0 && trans->action->anyBreakStmt() )
+ trans->targ->outNeeded = true;
+ }
+ }
+}
+
+void IpGotoCodeGen::writeOutData()
+{
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+}
+
+void IpGotoCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( useAgainLabel() ) {
+ out <<
+ " goto _resume;\n"
+ "\n"
+ "_again:\n"
+ " switch ( " << CS() << " ) {\n";
+ AGAIN_CASES() <<
+ " default: break;\n"
+ " }\n"
+ "\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out << "_resume:\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n";
+ EXIT_STATES() <<
+ "\n";
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out <<
+ " }\n";
+}
+
+void IpGotoCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << CS() << " ) {\n";
+ FINISH_CASES();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/ipgotocodegen.h b/rlcodegen/ipgotocodegen.h
new file mode 100644
index 0000000..25b64ee
--- /dev/null
+++ b/rlcodegen/ipgotocodegen.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _IPGCODEGEN_H
+#define _IPGCODEGEN_H
+
+#include <iostream>
+#include "gotocodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+/*
+ * class FGotoCodeGen
+ */
+class IpGotoCodeGen : public GotoCodeGen
+{
+public:
+ std::ostream &EXIT_STATES();
+ std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+ std::ostream &FINISH_CASES();
+ std::ostream &AGAIN_CASES();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void RET( ostream &ret, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void BREAK( ostream &ret, int targState );
+
+ virtual void writeOutData();
+ virtual void writeOutEOF();
+ virtual void writeOutExec();
+
+protected:
+ bool useAgainLabel()
+ { return anyRegActionRets() || anyRegActionByValControl() || anyRegNextStmt(); }
+
+ /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for
+ * each state. */
+ bool IN_TRANS_ACTIONS( RedStateAp *state );
+ void GOTO_HEADER( RedStateAp *state );
+ void STATE_GOTO_ERROR();
+
+ /* Set up labelNeeded flag for each state. */
+ void setLabelsNeeded( InlineList *inlineList );
+ void setLabelsNeeded();
+};
+
+
+/*
+ * class CIpGotoCodeGen
+ */
+struct CIpGotoCodeGen
+ : public IpGotoCodeGen, public CCodeGen
+{
+};
+
+/*
+ * class DIpGotoCodeGen
+ */
+struct DIpGotoCodeGen
+ : public IpGotoCodeGen, public DCodeGen
+{
+};
+
+
+#endif /* _IPGCODEGEN_H */
diff --git a/rlcodegen/javacodegen.cpp b/rlcodegen/javacodegen.cpp
new file mode 100644
index 0000000..f902620
--- /dev/null
+++ b/rlcodegen/javacodegen.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "javacodegen.h"
+#include "rlcodegen.h"
+#include "tabcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+void JavaTabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "break _again;}";
+}
+
+void JavaTabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "break _again;}";
+}
+
+void JavaTabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "break _again;}";
+}
+
+void JavaTabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "break _again;}";
+}
+
+void JavaTabCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP()
+ << "]; " << CTRL_FLOW() << "break _again;}";
+}
+
+void JavaTabCodeGen::BREAK( ostream &ret, int targState )
+{
+ ret << CTRL_FLOW() << "break _resume;";
+}
+
+void JavaTabCodeGen::COND_TRANSLATE()
+{
+ out <<
+ " _widec = " << GET_KEY() << ";\n"
+ " _keys = " << CO() << "[" << CS() << "]*2\n;"
+ " _klen = " << CL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " int _lower = _keys\n;"
+ " int _mid;\n"
+ " int _upper = _keys + (_klen<<1) - 2;\n"
+ " while (true) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < " << CK() << "[_mid] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > " << CK() << "[_mid] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " switch ( " << C() << "[" << CO() << "[" << CS() << "]"
+ " + ((_mid - _keys)>>1)] ) {\n"
+ ;
+
+ for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) {
+ CondSpace *condSpace = csi;
+ out << " case " << condSpace->condSpaceId << ": {\n";
+ out << TABS(2) << "_widec = " << KEY(condSpace->baseKey) <<
+ " + (" << GET_KEY() << " - " << KEY(keyOps->minKey) << ");\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(2) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+
+ out <<
+ " break;\n"
+ " }\n";
+ }
+
+ out <<
+ " }\n"
+ " break;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+}
+
+
+void JavaTabCodeGen::LOCATE_TRANS()
+{
+ out <<
+ " _match: do {\n"
+ " _keys = " << KO() << "[" << CS() << "]" << ";\n"
+ " _trans = " << IO() << "[" << CS() << "];\n"
+ " _klen = " << SL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " int _lower = _keys;\n"
+ " int _mid;\n"
+ " int _upper = _keys + _klen - 1;\n"
+ " while (true) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + ((_upper-_lower) >> 1);\n"
+ " if ( " << GET_WIDE_KEY() << " < " << K() << "[_mid] )\n"
+ " _upper = _mid - 1;\n"
+ " else if ( " << GET_WIDE_KEY() << " > " << K() << "[_mid] )\n"
+ " _lower = _mid + 1;\n"
+ " else {\n"
+ " _trans += (_mid - _keys);\n"
+ " break _match;\n"
+ " }\n"
+ " }\n"
+ " _keys += _klen;\n"
+ " _trans += _klen;\n"
+ " }\n"
+ "\n"
+ " _klen = " << RL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " int _lower = _keys;\n"
+ " int _mid;\n"
+ " int _upper = _keys + (_klen<<1) - 2;\n"
+ " while (true) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < " << K() << "[_mid] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > " << K() << "[_mid+1] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " _trans += ((_mid - _keys)>>1);\n"
+ " break _match;\n"
+ " }\n"
+ " }\n"
+ " _trans += _klen;\n"
+ " }\n"
+ " } while (false);\n"
+ "\n";
+}
+
+void JavaTabCodeGen::writeOutExec()
+{
+ out <<
+ " {\n"
+ " int _klen";
+
+ if ( anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " int _trans;\n";
+
+ if ( anyConditions() )
+ out << " int _widec;\n";
+
+ if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) {
+ out <<
+ " int _acts;\n"
+ " int _nacts;\n";
+ }
+
+ out <<
+ " int _keys;\n"
+ "\n";
+
+ if ( cgd->hasEnd )
+ out << " if ( " << P() << " != " << PE() << " ) {\n";
+
+ out << " _resume: while ( true ) {\n";
+
+ out << " _again: do {\n";
+
+ if ( redFsm->errState != 0 ) {
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " break _resume;\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " _acts = " << FSA() << "[" << CS() << "]" << ";\n"
+ " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( " << A() << "[_acts++] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ if ( anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ if ( useIndicies )
+ out << " _trans = " << I() << "[_trans];\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " break _again;\n"
+ "\n"
+ " _acts = " << TA() << "[_trans]" << ";\n"
+ " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n"
+ " while ( _nacts-- > 0 )\n {\n"
+ " switch ( " << A() << "[_acts++] )\n"
+ " {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ /* Again loop, functions as again label. */
+ out << " } while (false);\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " _acts = " << TSA() << "[" << CS() << "]" << ";\n"
+ " _nacts = " << CAST("int") << " " << A() << "[_acts++];\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( " << A() << "[_acts++] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " break _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ /* Close the resume loop. */
+ out << " }\n";
+
+ /* The if guarding on empty string. */
+ if ( cgd->hasEnd )
+ out << " }\n";
+
+ /* The execute block. */
+ out << " }\n";
+}
+
+void JavaTabCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " int _acts = " << EA() << "[" << CS() << "]" << ";\n"
+ " int _nacts = " << CAST("int") << " " << A() << "[_acts++];\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( " << A() << "[_acts++] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
+
diff --git a/rlcodegen/javacodegen.h b/rlcodegen/javacodegen.h
new file mode 100644
index 0000000..878f647
--- /dev/null
+++ b/rlcodegen/javacodegen.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _JAVACODEGEN_H
+#define _JAVACODEGEN_H
+
+#include "tabcodegen.h"
+
+/*
+ * JavaTabCodeGen
+ */
+struct JavaTabCodeGen
+ : public TabCodeGen, public JavaCodeGen
+{
+ void BREAK( ostream &ret, int targState );
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void RET( ostream &ret, bool inFinish );
+
+ void COND_TRANSLATE();
+ void LOCATE_TRANS();
+ virtual void writeOutExec();
+ virtual void writeOutEOF();
+};
+
+
+#endif
diff --git a/rlcodegen/main.cpp b/rlcodegen/main.cpp
new file mode 100644
index 0000000..f9a0598
--- /dev/null
+++ b/rlcodegen/main.cpp
@@ -0,0 +1,441 @@
+/*
+ * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <unistd.h>
+
+#include "rlcodegen.h"
+#include "rlcodegen.h"
+#include "xmlparse.h"
+#include "pcheck.h"
+#include "vector.h"
+#include "version.h"
+
+#include "common.cpp"
+
+using std::istream;
+using std::ifstream;
+using std::ostream;
+using std::ios;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Target language and output style. */
+OutputFormat outputFormat = OutCode;
+CodeStyleEnum codeStyle = GenTables;
+
+/* Io globals. */
+istream *inStream = 0;
+ostream *outStream = 0;
+output_filter *outFilter = 0;
+char *outputFileName = 0;
+
+/* Graphviz dot file generation. */
+bool graphvizDone = false;
+
+char *gblFileName = "<unknown>";
+
+int numSplitPartitions = 0;
+
+bool printPrintables = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: rlcodegen [options] file\n"
+"general:\n"
+" -h, -H, -?, --help Print this usage and exit\n"
+" -v, --version Print version information and exit\n"
+" -o <file> Write output to <file>\n"
+"output:\n"
+" -V Generate a Graphviz dotfile instead of code\n"
+" -p Print printable characters in Graphviz output\n"
+"generated code style:\n"
+" -T0 Table driven FSM (default)\n"
+" -T1 Faster table driven FSM\n"
+" -F0 Flat table driven FSM\n"
+" -F1 Faster flat table-driven FSM\n"
+" -G0 Goto-driven FSM\n"
+" -G1 Faster goto-driven FSM\n"
+" -G2 Really fast goto-driven FSM\n"
+" -P<N> N-Way Split really fast goto-driven FSM\n"
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Ragel Code Generator version " VERSION << " " PUBDATE << endl <<
+ "Copyright (c) 2001-2006 by Adrian Thurston" << endl;
+}
+
+/* Scans a string looking for the file extension. If there is a file
+ * extension then pointer returned points to inside the string
+ * passed in. Otherwise returns null. */
+char *findFileExtension( char *stemFile )
+{
+ char *ppos = stemFile + strlen(stemFile) - 1;
+
+ /* Scan backwards from the end looking for the first dot.
+ * If we encounter a '/' before the first dot, then stop the scan. */
+ while ( 1 ) {
+ /* If we found a dot or got to the beginning of the string then
+ * we are done. */
+ if ( ppos == stemFile || *ppos == '.' )
+ break;
+
+ /* If we hit a / then there is no extension. Done. */
+ if ( *ppos == '/' ) {
+ ppos = stemFile;
+ break;
+ }
+ ppos--;
+ }
+
+ /* If we got to the front of the string then bail we
+ * did not find an extension */
+ if ( ppos == stemFile )
+ ppos = 0;
+
+ return ppos;
+}
+
+/* Make a file name from a stem. Removes the old filename suffix and
+ * replaces it with a new one. Returns a newed up string. */
+char *fileNameFromStem( char *stemFile, char *suffix )
+{
+ int len = strlen( stemFile );
+ assert( len > 0 );
+
+ /* Get the extension. */
+ char *ppos = findFileExtension( stemFile );
+
+ /* If an extension was found, then shorten what we think the len is. */
+ if ( ppos != 0 )
+ len = ppos - stemFile;
+
+ /* Make the return string from the stem and the suffix. */
+ char *retVal = new char[ len + strlen( suffix ) + 1 ];
+ strncpy( retVal, stemFile, len );
+ strcpy( retVal + len, suffix );
+
+ return retVal;
+}
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+/* Print the opening to a program error, then return the error stream. */
+ostream &error()
+{
+ gblErrorCount += 1;
+ cerr << PROGNAME ": ";
+ return cerr;
+}
+
+/* Print the opening to an error in the input, then return the error ostream. */
+//ostream &error( const YYLTYPE &loc )
+//{
+// gblErrorCount += 1;
+// cerr << gblFileName << ":" << loc.first_line << ":" << loc.first_column << ": ";
+// return cerr;
+//}
+
+/* Print the opening to an error in the input, then return the error ostream. */
+//ostream &error( const InputLoc &loc )
+//{
+// gblErrorCount += 1;
+// cerr << gblFileName << ":" << loc.line << ":" << loc.col << ": ";
+// return cerr;
+//}
+
+ostream &error( int first_line, int first_column )
+{
+ gblErrorCount += 1;
+ cerr << gblFileName << ":" << ":" << first_line << ":" << first_column << ": ";
+ return cerr;
+}
+
+ostream &warning( )
+{
+ cerr << gblFileName << ":" << ": warning: ";
+ return cerr;
+}
+
+ostream &warning( const InputLoc &loc )
+{
+ cerr << gblFileName << loc.line << ":" << loc.col << ": warning: ";
+ return cerr;
+}
+
+std::ostream &warning( int first_line, int first_column )
+{
+ cerr << gblFileName << ":" << first_line << ":" <<
+ first_column << ": warning: ";
+ return cerr;
+}
+
+//ostream &xml_error( const YYLTYPE &loc )
+//{
+// gblErrorCount += 1;
+// cerr << "<xml-input>:" << loc.first_line << ":" << loc.first_column << ": ";
+// return cerr;
+//}
+
+ostream &xml_error( const InputLoc &loc )
+{
+ gblErrorCount += 1;
+ cerr << "<xml-input>:" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+/* Counts newlines before sending sync. */
+int output_filter::sync( )
+{
+ line += 1;
+ return std::filebuf::sync();
+}
+
+/* Counts newlines before sending data out to file. */
+std::streamsize output_filter::xsputn( const char *s, std::streamsize n )
+{
+ for ( int i = 0; i < n; i++ ) {
+ if ( s[i] == '\n' )
+ line += 1;
+ }
+ return std::filebuf::xsputn( s, n );
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+/* Invoked by the parser, after the source file
+ * name is taken from XML file. */
+void openOutput( char *inputFile )
+{
+ /* If the output format is code and no output file name is given, then
+ * make a default. */
+ if ( outputFormat == OutCode && outputFileName == 0 ) {
+ char *ext = findFileExtension( inputFile );
+ if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
+ outputFileName = fileNameFromStem( inputFile, ".h" );
+ else {
+ char *defExtension = 0;
+ switch ( hostLangType ) {
+ case CCode: defExtension = ".c"; break;
+ case DCode: defExtension = ".d"; break;
+ case JavaCode: defExtension = ".java"; break;
+ }
+ outputFileName = fileNameFromStem( inputFile, defExtension );
+ }
+ }
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( outputFileName != 0 ) {
+ /* Create the filter on the output and open it. */
+ outFilter = new output_filter;
+ outFilter->open( outputFileName, ios::out|ios::trunc );
+ if ( !outFilter->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ exit(1);
+ }
+
+ /* Open the output stream, attaching it to the filter. */
+ outStream = new ostream( outFilter );
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, char **argv)
+{
+ ParamCheck pc("o:VpT:F:G:vHh?-:P:", argc, argv);
+ char *xmlInputFileName = 0;
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ /* Output. */
+ case 'o':
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = pc.parameterArg;
+ }
+ break;
+
+ /* Output formats. */
+ case 'V':
+ outputFormat = OutGraphvizDot;
+ break;
+
+ case 'p':
+ printPrintables = true;
+ break;
+
+ /* Code style. */
+ case 'T':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenTables;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFTables;
+ else {
+ error() << "-T" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'F':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenFlat;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFFlat;
+ else {
+ error() << "-F" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'G':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenGoto;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFGoto;
+ else if ( pc.parameterArg[0] == '2' )
+ codeStyle = GenIpGoto;
+ else {
+ error() << "-G" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'P':
+ codeStyle = GenSplit;
+ numSplitPartitions = atoi( pc.parameterArg );
+ break;
+
+ /* Version and help. */
+ case 'v':
+ version();
+ exit(0);
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ break;
+ }
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( xmlInputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ xmlInputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Open the input file for reading. */
+ if ( xmlInputFileName != 0 ) {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( xmlInputFileName );
+ inStream = inFile;
+ if ( ! inFile->is_open() )
+ error() << "could not open " << xmlInputFileName << " for reading" << endl;
+ }
+ else {
+ xmlInputFileName = "<stdin>";
+ inStream = &cin;
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Parse the input! */
+ xml_parse( *inStream, xmlInputFileName );
+
+ /* If writing to a file, delete the ostream, causing it to flush.
+ * Standard out is flushed automatically. */
+ if ( outputFileName != 0 ) {
+ delete outStream;
+ delete outFilter;
+ }
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 ) {
+ /* If we opened an output file, remove it. */
+ if ( outputFileName != 0 )
+ unlink( outputFileName );
+ exit(1);
+ }
+ return 0;
+}
diff --git a/rlcodegen/redfsm.cpp b/rlcodegen/redfsm.cpp
new file mode 100644
index 0000000..ffcc207
--- /dev/null
+++ b/rlcodegen/redfsm.cpp
@@ -0,0 +1,535 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "redfsm.h"
+#include "avlmap.h"
+#include <iostream>
+#include <sstream>
+
+using std::ostringstream;
+
+KeyOps *keyOps = 0;
+
+string Action::nameOrLoc()
+{
+ if ( name != 0 )
+ return string(name);
+ else {
+ ostringstream ret;
+ ret << loc.line << ":" << loc.col;
+ return ret.str();
+ }
+}
+
+RedFsmAp::RedFsmAp()
+:
+ wantComplete(false),
+ forcedErrorState(false),
+ nextActionId(0),
+ nextTransId(0),
+ errState(0),
+ errTrans(0),
+ firstFinState(0),
+ numFinStates(0)
+{
+}
+
+void RedFsmAp::depthFirstOrdering( RedStateAp *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->onStateList )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->onStateList = true;
+ stateList.append( state );
+
+ /* At this point transitions should only be in ranges. */
+ assert( state->outSingle.length() == 0 );
+ assert( state->defTrans == 0 );
+
+ /* Recurse on everything ranges. */
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ != 0 )
+ depthFirstOrdering( rtel->value->targ );
+ }
+}
+
+/* Ordering states by transition connections. */
+void RedFsmAp::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->onStateList = false;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ depthFirstOrdering( startState );
+ for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( *en );
+ if ( forcedErrorState )
+ depthFirstOrdering( errState );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Assign state ids by appearance in the state list. */
+void RedFsmAp::sequentialStateIds()
+{
+ /* Table based machines depend on the state numbers starting at zero. */
+ nextStateId = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->id = nextStateId++;
+}
+
+/* Stable sort the states by final state status. */
+void RedFsmAp::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ RedStateAp *state = 0;
+ RedStateAp *next = stateList.head;
+ RedStateAp *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinal ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+/* Assign state ids by final state state status. */
+void RedFsmAp::sortStateIdsByFinal()
+{
+ /* Table based machines depend on this starting at zero. */
+ nextStateId = 0;
+
+ /* First pass to assign non final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( ! st->isFinal )
+ st->id = nextStateId++;
+ }
+
+ /* Second pass to assign final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal )
+ st->id = nextStateId++;
+ }
+}
+
+/* Find the final state with the lowest id. */
+void RedFsmAp::findFirstFinState()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) )
+ firstFinState = st;
+ }
+}
+
+void RedFsmAp::assignActionLocs()
+{
+ int nextLocation = 0;
+ for ( ActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ /* Store the loc, skip over the array and a null terminator. */
+ act->location = nextLocation;
+ nextLocation += act->key.length() + 1;
+ }
+}
+
+/* Check if we can extend the current range by displacing any ranges
+ * ahead to the singles. */
+bool RedFsmAp::canExtend( const RedTransList &list, int pos )
+{
+ /* Get the transition that we want to extend. */
+ RedTransAp *extendTrans = list[pos].value;
+
+ /* Look ahead in the transition list. */
+ for ( int next = pos + 1; next < list.length(); pos++, next++ ) {
+ /* If they are not continuous then cannot extend. */
+ Key nextKey = list[next].lowKey;
+ nextKey.decrement();
+ if ( list[pos].highKey != nextKey )
+ break;
+
+ /* Check for the extenstion property. */
+ if ( extendTrans == list[next].value )
+ return true;
+
+ /* If the span of the next element is more than one, then don't keep
+ * checking, it won't be moved to single. */
+ unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey );
+ if ( nextSpan > 1 )
+ break;
+ }
+ return false;
+}
+
+/* Move ranges to the singles list. */
+void RedFsmAp::moveTransToSingle( RedStateAp *state )
+{
+ RedTransList &range = state->outRange;
+ RedTransList &single = state->outSingle;
+ for ( int rpos = 0; rpos < range.length(); ) {
+ /* Check if this is a range we can extend. */
+ if ( canExtend( range, rpos ) ) {
+ /* Transfer singles over. */
+ while ( range[rpos].value != range[rpos+1].value ) {
+ /* Transfer the range to single. */
+ single.append( range[rpos+1] );
+ range.remove( rpos+1 );
+ }
+
+ /* Extend. */
+ range[rpos].highKey = range[rpos+1].highKey;
+ range.remove( rpos+1 );
+ }
+ /* Maybe move it to the singles. */
+ else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {
+ single.append( range[rpos] );
+ range.remove( rpos );
+ }
+ else {
+ /* Keeping it in the ranges. */
+ rpos += 1;
+ }
+ }
+}
+
+/* Look through ranges and choose suitable single character transitions. */
+void RedFsmAp::chooseSingle()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rewrite the transition list taking out the suitable single
+ * transtions. */
+ moveTransToSingle( st );
+ }
+}
+
+void RedFsmAp::makeFlat()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->stateCondList.length() == 0 ) {
+ st->condLowKey = 0;
+ st->condHighKey = 0;
+ }
+ else {
+ st->condLowKey = st->stateCondList.head->lowKey;
+ st->condHighKey = st->stateCondList.tail->highKey;
+
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ st->condList = new CondSpace*[ span ];
+ memset( st->condList, 0, sizeof(CondSpace*)*span );
+
+ for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->condLowKey, sci->lowKey )-1;
+ trSpan = keyOps->span( sci->lowKey, sci->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->condList[base+pos] = sci->condSpace;
+ }
+ }
+
+ if ( st->outRange.length() == 0 ) {
+ st->lowKey = st->highKey = 0;
+ st->transList = 0;
+ }
+ else {
+ st->lowKey = st->outRange[0].lowKey;
+ st->highKey = st->outRange[st->outRange.length()-1].highKey;
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ st->transList = new RedTransAp*[ span ];
+ memset( st->transList, 0, sizeof(RedTransAp*)*span );
+
+ for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->lowKey, trans->lowKey )-1;
+ trSpan = keyOps->span( trans->lowKey, trans->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->transList[base+pos] = trans->value;
+ }
+
+ /* Fill in the gaps with the default transition. */
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->transList[pos] == 0 )
+ st->transList[pos] = st->defTrans;
+ }
+ }
+ }
+}
+
+
+/* A default transition has been picked, move it from the outRange to the
+ * default pointer. */
+void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state )
+{
+ /* Rewrite the outRange, omitting any ranges that use
+ * the picked default. */
+ RedTransList outRange;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* If it does not take the default, copy it over. */
+ if ( rtel->value != defTrans )
+ outRange.append( *rtel );
+ }
+
+ /* Save off the range we just created into the state's range. */
+ state->outRange.shallowCopy( outRange );
+ outRange.abandon();
+
+ /* Store the default. */
+ state->defTrans = defTrans;
+}
+
+bool RedFsmAp::alphabetCovered( RedTransList &outRange )
+{
+ /* Cannot cover without any out ranges. */
+ if ( outRange.length() == 0 )
+ return false;
+
+ /* If the first range doesn't start at the the lower bound then the
+ * alphabet is not covered. */
+ RedTransList::Iter rtel = outRange;
+ if ( keyOps->minKey < rtel->lowKey )
+ return false;
+
+ /* Check that every range is next to the previous one. */
+ rtel.increment();
+ for ( ; rtel.lte(); rtel++ ) {
+ Key highKey = rtel[-1].highKey;
+ highKey.increment();
+ if ( highKey != rtel->lowKey )
+ return false;
+ }
+
+ /* The last must extend to the upper bound. */
+ RedTransEl *last = &outRange[outRange.length()-1];
+ if ( last->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many alphabet characters the
+ * transition spans. */
+ unsigned long long *span = new unsigned long long[stateTransSet.length()];
+ memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTransAp **inSet = stateTransSet.find( rtel->value );
+ int pos = inSet - stateTransSet.data;
+ span[pos] += keyOps->span( rtel->lowKey, rtel->highKey );
+ }
+
+ /* Find the max span, choose it for making the default. */
+ RedTransAp *maxTrans = 0;
+ unsigned long long maxSpan = 0;
+ for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( span[rtel.pos()] > maxSpan ) {
+ maxSpan = span[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] span;
+ return maxTrans;
+}
+
+/* Pick default transitions from ranges for the states. */
+void RedFsmAp::chooseDefaultSpan()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Only pick a default transition if the alphabet is covered. This
+ * avoids any transitions in the out range that go to error and avoids
+ * the need for an ERR state. */
+ if ( alphabetCovered( st->outRange ) ) {
+ /* Pick a default transition by largest span. */
+ RedTransAp *defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+ }
+}
+
+RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ == state->next )
+ return rtel->value;
+ }
+ return 0;
+}
+
+void RedFsmAp::chooseDefaultGoto()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTransAp *defTrans = chooseDefaultGoto( st );
+ if ( defTrans == 0 )
+ defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many ranges use the transition. */
+ int *numRanges = new int[stateTransSet.length()];
+ memset( numRanges, 0, sizeof(int) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTransAp **inSet = stateTransSet.find( rtel->value );
+ numRanges[inSet - stateTransSet.data] += 1;
+ }
+
+ /* Find the max number of ranges. */
+ RedTransAp *maxTrans = 0;
+ int maxNumRanges = 0;
+ for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( numRanges[rtel.pos()] > maxNumRanges ) {
+ maxNumRanges = numRanges[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] numRanges;
+ return maxTrans;
+}
+
+void RedFsmAp::chooseDefaultNumRanges()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTransAp *defTrans = chooseDefaultNumRanges( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTransAp *RedFsmAp::getErrorTrans( )
+{
+ /* If the error trans has not been made aready, make it. */
+ if ( errTrans == 0 ) {
+ /* This insert should always succeed since no transition created by
+ * the user can point to the error state. */
+ errTrans = new RedTransAp( getErrorState(), 0, nextTransId++ );
+ RedTransAp *inRes = transSet.insert( errTrans );
+ assert( inRes != 0 );
+ }
+ return errTrans;
+}
+
+RedStateAp *RedFsmAp::getErrorState()
+{
+ /* Check if we need to init the error trans. */
+ if ( errState == 0 ) {
+ errState = new RedStateAp();
+ stateList.append( errState );
+ }
+ return errState;
+}
+
+
+RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action )
+{
+ /* Create a reduced trans and look for it in the transiton set. */
+ RedTransAp redTrans( targ, action, 0 );
+ RedTransAp *inDict = transSet.find( &redTrans );
+ if ( inDict == 0 ) {
+ inDict = new RedTransAp( targ, action, nextTransId++ );
+ transSet.insert( inDict );
+ }
+ return inDict;
+}
+
+void RedFsmAp::partitionFsm( int nparts )
+{
+ /* At this point the states are ordered by a depth-first traversal. We
+ * will allocate to partitions based on this ordering. */
+ this->nParts = nparts;
+ int partSize = stateList.length() / nparts;
+ int remainder = stateList.length() % nparts;
+ int numInPart = partSize;
+ int partition = 0;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->partition = partition;
+
+ numInPart -= 1;
+ if ( numInPart == 0 ) {
+ partition += 1;
+ numInPart = partSize;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ }
+ }
+}
+
+void RedFsmAp::setInTrans()
+{
+ /* First pass counts the number of transitions. */
+ for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->numInTrans += 1;
+
+ /* Pass over states to allocate the needed memory. Reset the counts so we
+ * can use them as the current size. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->inTrans = new RedTransAp*[st->numInTrans];
+ st->numInTrans = 0;
+ }
+
+ /* Second pass over transitions copies pointers into the in trans list. */
+ for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->inTrans[trans->targ->numInTrans++] = trans;
+}
diff --git a/rlcodegen/redfsm.h b/rlcodegen/redfsm.h
new file mode 100644
index 0000000..42df42e
--- /dev/null
+++ b/rlcodegen/redfsm.h
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _REDFSM_H
+#define _REDFSM_H
+
+#include <assert.h>
+#include <string.h>
+#include <string>
+#include "common.h"
+#include "vector.h"
+#include "dlist.h"
+#include "compare.h"
+#include "bstmap.h"
+#include "bstset.h"
+#include "avlmap.h"
+#include "avltree.h"
+#include "avlbasic.h"
+#include "mergesort.h"
+#include "rlcodegen.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+
+#define TRANS_ERR_TRANS 0
+#define STATE_ERR_STATE 0
+#define FUNC_NO_FUNC 0
+
+using std::string;
+
+struct RedStateAp;
+struct InlineList;
+struct Action;
+
+/*
+ * Inline code tree
+ */
+struct InlineItem
+{
+ enum Type
+ {
+ Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret,
+ PChar, Char, Hold, Exec, HoldTE, ExecTE, Curs, Targs, Entry,
+ LmSwitch, LmSetActId, LmSetTokEnd, LmGetTokEnd, LmInitTokStart,
+ LmInitAct, LmSetTokStart, SubAction, Break
+ };
+
+ InlineItem( const InputLoc &loc, Type type ) :
+ loc(loc), data(0), targId(0), targState(0),
+ lmId(0), children(0), offset(0),
+ handlesError(false), type(type) { }
+
+ InputLoc loc;
+ char *data;
+ int targId;
+ RedStateAp *targState;
+ int lmId;
+ InlineList *children;
+ int offset;
+ bool handlesError;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+/* Normally this would be atypedef, but that would entail including DList from
+ * ptreetypes, which should be just typedef forwards. */
+struct InlineList : public DList<InlineItem> { };
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>
+{
+ Action( )
+ :
+ name(0),
+ inlineList(0),
+ actionId(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0)
+ {
+ }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ char *name;
+ InlineList *inlineList;
+ int actionId;
+
+ string nameOrLoc();
+
+ /* Number of references in the final machine. */
+ int numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+};
+
+
+/* Forwards. */
+struct RedStateAp;
+struct StateAp;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Set of states. */
+typedef BstSet<RedStateAp*> RedStateSet;
+typedef BstSet<int> IntSet;
+
+/* Reduced action. */
+struct RedAction
+:
+ public AvlTreeEl<RedAction>
+{
+ RedAction( )
+ :
+ key(),
+ eofRefs(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ bAnyNextStmt(false),
+ bAnyCurStateRef(false),
+ bAnyBreakStmt(false)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int actListId;
+ int location;
+ IntSet *eofRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ bool anyNextStmt() { return bAnyNextStmt; }
+ bool anyCurStateRef() { return bAnyCurStateRef; }
+ bool anyBreakStmt() { return bAnyBreakStmt; }
+
+ bool bAnyNextStmt;
+ bool bAnyCurStateRef;
+ bool bAnyBreakStmt;
+};
+typedef AvlTree<RedAction, ActionTable, CmpActionTable> ActionTableMap;
+
+/* Reduced transition. */
+struct RedTransAp
+:
+ public AvlTreeEl<RedTransAp>
+{
+ RedTransAp( RedStateAp *targ, RedAction *action, int id )
+ : targ(targ), action(action), id(id), labelNeeded(true) { }
+
+ RedStateAp *targ;
+ RedAction *action;
+ int id;
+ bool partitionBoundary;
+ bool labelNeeded;
+};
+
+/* Compare of transitions for the final reduction of transitions. Comparison
+ * is on target and the pointer to the shared action table. It is assumed that
+ * when this is used the action tables have been reduced. */
+struct CmpRedTransAp
+{
+ static int compare( const RedTransAp &t1, const RedTransAp &t2 )
+ {
+ if ( t1.targ < t2.targ )
+ return -1;
+ else if ( t1.targ > t2.targ )
+ return 1;
+ else if ( t1.action < t2.action )
+ return -1;
+ else if ( t1.action > t2.action )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet;
+
+/* Element in out range. */
+struct RedTransEl
+{
+ /* Constructors. */
+ RedTransEl( Key lowKey, Key highKey, RedTransAp *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ RedTransAp *value;
+};
+
+typedef Vector<RedTransEl> RedTransList;
+typedef Vector<RedStateAp*> RedStateVect;
+
+typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl;
+typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap;
+
+/* Compare used by span map sort. Reverse sorts by the span. */
+struct CmpRedSpanMapEl
+{
+ static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 )
+ {
+ if ( smel1.value > smel2.value )
+ return -1;
+ else if ( smel1.value < smel2.value )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Sorting state-span map entries by span. */
+typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort;
+
+/* Set of entry ids that go into this state. */
+typedef Vector<int> EntryIdVect;
+typedef Vector<char*> EntryNameVect;
+
+typedef Vector< Action* > CondSet;
+
+struct Condition
+{
+ Condition( )
+ : key(0), baseKey(0) {}
+
+ Key key;
+ Key baseKey;
+ CondSet condSet;
+
+ Condition *next, *prev;
+};
+typedef DList<Condition> ConditionList;
+
+struct CondSpace
+{
+ Key baseKey;
+ CondSet condSet;
+ int condSpaceId;
+
+ CondSpace *next, *prev;
+};
+typedef DList<CondSpace> CondSpaceList;
+
+struct StateCond
+{
+ Key lowKey;
+ Key highKey;
+
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+typedef DList<StateCond> StateCondList;
+typedef Vector<StateCond*> StateCondVect;
+
+/* Reduced state. */
+struct RedStateAp
+{
+ RedStateAp()
+ :
+ defTrans(0),
+ condList(0),
+ transList(0),
+ isFinal(false),
+ labelNeeded(false),
+ outNeeded(false),
+ onStateList(false),
+ toStateAction(0),
+ fromStateAction(0),
+ eofAction(0),
+ id(0),
+ bAnyRegCurStateRef(false),
+ partitionBoundary(false),
+ inTrans(0),
+ numInTrans(0)
+ { }
+
+ /* Transitions out. */
+ RedTransList outSingle;
+ RedTransList outRange;
+ RedTransAp *defTrans;
+
+ /* For flat conditions. */
+ Key condLowKey, condHighKey;
+ CondSpace **condList;
+
+ /* For flat keys. */
+ Key lowKey, highKey;
+ RedTransAp **transList;
+
+ /* The list of states that transitions from this state go to. */
+ RedStateVect targStates;
+
+ bool isFinal;
+ bool labelNeeded;
+ bool outNeeded;
+ bool onStateList;
+ RedAction *toStateAction;
+ RedAction *fromStateAction;
+ RedAction *eofAction;
+ int id;
+ StateCondList stateCondList;
+ StateCondVect stateCondVect;
+
+ /* Pointers for the list of states. */
+ RedStateAp *prev, *next;
+
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool bAnyRegCurStateRef;
+
+ int partition;
+ bool partitionBoundary;
+
+ RedTransAp **inTrans;
+ int numInTrans;
+};
+
+/* List of states. */
+typedef DList<RedStateAp> RedStateList;
+
+/* Set of reduced transitons. Comparison is by pointer. */
+typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet;
+
+/* Next version of the fsm machine. */
+struct RedFsmAp
+{
+ RedFsmAp();
+
+ bool wantComplete;
+ bool forcedErrorState;
+
+ int nextActionId;
+ int nextTransId;
+
+ /* Next State Id doubles as the total number of state ids. */
+ int nextStateId;
+
+ TransApSet transSet;
+ ActionTableMap actionMap;
+ RedStateList stateList;
+ RedStateSet entryPoints;
+ RedStateAp *startState;
+ RedStateAp *errState;
+ RedTransAp *errTrans;
+ RedTransAp *errActionTrans;
+ RedStateAp *firstFinState;
+ int numFinStates;
+ int nParts;
+
+ /* Is is it possible to extend a range by bumping ranges that span only
+ * one character to the singles array. */
+ bool canExtend( const RedTransList &list, int pos );
+
+ /* Pick single transitions from the ranges. */
+ void moveTransToSingle( RedStateAp *state );
+ void chooseSingle();
+
+ void makeFlat();
+
+ /* Move a selected transition from ranges to default. */
+ void moveToDefault( RedTransAp *defTrans, RedStateAp *state );
+
+ /* Pick a default transition by largest span. */
+ RedTransAp *chooseDefaultSpan( RedStateAp *state );
+ void chooseDefaultSpan();
+
+ /* Pick a default transition by most number of ranges. */
+ RedTransAp *chooseDefaultNumRanges( RedStateAp *state );
+ void chooseDefaultNumRanges();
+
+ /* Pick a default transition tailored towards goto driven machine. */
+ RedTransAp *chooseDefaultGoto( RedStateAp *state );
+ void chooseDefaultGoto();
+
+ /* Ordering states by transition connections. */
+ void optimizeStateOrdering( RedStateAp *state );
+ void optimizeStateOrdering();
+
+ /* Ordering states by transition connections. */
+ void depthFirstOrdering( RedStateAp *state );
+ void depthFirstOrdering();
+
+ /* Set state ids. */
+ void sequentialStateIds();
+ void sortStateIdsByFinal();
+
+ /* Arrange states in by final id. This is a stable sort. */
+ void sortStatesByFinal();
+
+ /* Locating the first final state. This is the final state with the lowest
+ * id. */
+ void findFirstFinState();
+
+ void assignActionLocs();
+
+ RedTransAp *getErrorTrans();
+ RedStateAp *getErrorState();
+
+ /* Is every char in the alphabet covered? */
+ bool alphabetCovered( RedTransList &outRange );
+
+ RedTransAp *allocateTrans( RedStateAp *targState, RedAction *actionTable );
+
+ void partitionFsm( int nParts );
+
+ void setInTrans();
+};
+
+
+#endif /* _REDFSM_H */
diff --git a/rlcodegen/rlcodegen.h b/rlcodegen/rlcodegen.h
new file mode 100644
index 0000000..cc302ba
--- /dev/null
+++ b/rlcodegen/rlcodegen.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RLCODEGEN_H
+#define _RLCODEGEN_H
+
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include "avltree.h"
+#include "vector.h"
+#include "config.h"
+
+#define PROGNAME "rlcodegen"
+
+/* Target language. */
+enum OutputFormat
+{
+ OutCode,
+ OutGraphvizDot
+};
+
+/* Target output style. */
+enum CodeStyleEnum
+{
+ GenTables,
+ GenFTables,
+ GenFlat,
+ GenFFlat,
+ GenGoto,
+ GenFGoto,
+ GenIpGoto,
+ GenSplit
+};
+
+/* Filter on the output stream that keeps track of the number of lines
+ * output. */
+class output_filter : public std::filebuf
+{
+public:
+ output_filter() : line(1) { }
+
+ virtual int sync();
+ virtual std::streamsize xsputn(const char* s, std::streamsize n);
+
+ int line;
+};
+
+extern OutputFormat outputFormat;
+extern CodeStyleEnum codeStyle;
+
+/* IO filenames and stream. */
+extern char *outputFileName;
+extern std::ostream *outStream;
+extern output_filter *outFilter;
+
+extern bool printPrintables;
+extern bool graphvizDone;
+
+int xml_parse( std::istream &input, char *fileName );
+
+extern int gblErrorCount;
+extern char machineMain[];
+
+extern int numSplitPartitions;
+
+/*
+ * Error reporting.
+ */
+
+/* Location in an input file. */
+struct InputLoc
+{
+ int line;
+ int col;
+};
+
+struct AttrMarker
+{
+ char *id;
+ int idLen;
+ char *value;
+ int valueLen;
+};
+
+struct Attribute
+{
+ char *id;
+ char *value;
+};
+
+typedef Vector<AttrMarker> AttrMkList;
+typedef Vector<Attribute> AttrList;
+struct XMLTagHashPair;
+
+struct XMLTag
+{
+ enum TagType { Open, Close };
+
+ XMLTag( XMLTagHashPair *tagId, TagType type ) :
+ tagId(tagId), type(type),
+ content(0), attrList(0) {}
+
+ Attribute *findAttr( char *id )
+ {
+ if ( attrList != 0 ) {
+ for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) {
+ if ( strcmp( id, attr->id ) == 0 )
+ return attr;
+ }
+ }
+ return 0;
+ }
+
+ XMLTagHashPair *tagId;
+ TagType type;
+
+ /* Content is associtated with closing tags. */
+ char *content;
+
+ /* Attribute lists are associated with opening tags. */
+ AttrList *attrList;
+};
+
+
+std::ostream &error();
+//std::ostream &error( const YYLTYPE &loc );
+std::ostream &error( const InputLoc &loc );
+std::ostream &error( int first_line, int first_column );
+std::ostream &warning( );
+std::ostream &warning( const InputLoc &loc );
+std::ostream &warning( int first_line, int first_column );
+std::ostream &xml_error( const InputLoc &loc );
+//std::ostream &xml_error( const YYLTYPE &loc );
+
+
+
+void openOutput( char *inputFile );
+char *fileNameFromStem( char *stemFile, char *suffix );
+
+/* Size of the include stack. */
+#define INCLUDE_STACK_SIZE 32
+
+#endif /* _RLCODEGEN_H */
diff --git a/rlcodegen/splitcodegen.cpp b/rlcodegen/splitcodegen.cpp
new file mode 100644
index 0000000..48519ba
--- /dev/null
+++ b/rlcodegen/splitcodegen.cpp
@@ -0,0 +1,518 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "rlcodegen.h"
+#include "splitcodegen.h"
+#include "gendata.h"
+#include <assert.h>
+
+using std::ostream;
+using std::ios;
+using std::endl;
+
+/* Emit the goto to take for a given transition. */
+std::ostream &SplitCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ if ( trans->targ->partition == currentPartition ) {
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ }
+ else {
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto ptr" << trans->id << ";";
+ trans->partitionBoundary = true;
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto pst" << trans->targ->id << ";";
+ trans->targ->partitionBoundary = true;
+ }
+ }
+ return out;
+}
+
+/* Called from before writing the gotos for each state. */
+void SplitCodeGen::GOTO_HEADER( RedStateAp *state, bool stateInPartition )
+{
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Advance and test buffer pos. */
+ if ( state->labelNeeded ) {
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out" << state->id << ";\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+std::ostream &SplitCodeGen::STATE_GOTOS( int partition )
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partition == partition ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* We call into the base of the goto which calls back into us
+ * using virtual functions. Set the current partition rather
+ * than coding parameter passing throughout. */
+ currentPartition = partition;
+
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st, st->partition == partition );
+
+ if ( st->stateCondVect.length() > 0 ) {
+ out << " _widec = " << GET_KEY() << ";\n";
+ emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
+ }
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ }
+ return out;
+}
+
+
+std::ostream &SplitCodeGen::PART_TRANS( int partition )
+{
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ if ( trans->partitionBoundary ) {
+ out <<
+ "ptr" << trans->id << ":\n";
+
+ if ( trans->action != 0 ) {
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+ }
+
+ out <<
+ " goto pst" << trans->targ->id << ";\n";
+ trans->targ->partitionBoundary = true;
+ }
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partitionBoundary ) {
+ out <<
+ " pst" << st->id << ":\n"
+ " " << CS() << " = " << st->id << ";\n";
+
+ if ( st->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, st->id, false );
+ genLineDirective( out );
+ }
+
+ ptOutLabelUsed = true;
+ out << " goto _pt_out; \n";
+ }
+ }
+ return out;
+}
+
+std::ostream &SplitCodeGen::EXIT_STATES( int partition )
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partition == partition && st->outNeeded ) {
+ outLabelUsed = true;
+ out << " _out" << st->id << ": " << CS() << " = " <<
+ st->id << "; goto _out; \n";
+ }
+ }
+ return out;
+}
+
+
+std::ostream &SplitCodeGen::PARTITION( int partition )
+{
+ outLabelUsed = false;
+ ptOutLabelUsed = false;
+
+ /* Initialize the partition boundaries, which get set during the writing
+ * of states. After the state writing we will */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ trans->partitionBoundary = false;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->partitionBoundary = false;
+
+ out << " " << ALPH_TYPE() << " *p = *_pp, *pe = *_ppe;\n";
+
+ if ( anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( useAgainLabel() ) {
+ out <<
+ " goto _resume;\n"
+ "\n"
+ "_again:\n"
+ " switch ( " << CS() << " ) {\n";
+ AGAIN_CASES() <<
+ " default: break;\n"
+ " }\n"
+ "\n";
+
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out <<
+ "_resume:\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS( partition );
+ SWITCH_DEFAULT() <<
+ " }\n";
+ PART_TRANS( partition );
+ EXIT_STATES( partition );
+
+ if ( outLabelUsed ) {
+ out <<
+ "\n"
+ " _out:\n"
+ " *_pp = p;\n"
+ " *_ppe = pe;\n"
+ " return 0;\n";
+ }
+
+ if ( ptOutLabelUsed ) {
+ out <<
+ "\n"
+ " _pt_out:\n"
+ " *_pp = p;\n"
+ " *_ppe = pe;\n"
+ " return 1;\n";
+ }
+
+ return out;
+}
+
+std::ostream &SplitCodeGen::PART_MAP()
+{
+ int *partMap = new int[redFsm->stateList.length()];
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ partMap[st->id] = st->partition;
+
+ out << "\t";
+ int totalItem = 0;
+ for ( int i = 0; i < redFsm->stateList.length(); i++ ) {
+ out << partMap[i];
+ if ( i != redFsm->stateList.length() - 1 ) {
+ out << ", ";
+ if ( ++totalItem % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ delete[] partMap;
+ return out;
+}
+
+void SplitCodeGen::writeOutData()
+{
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+
+
+ OPEN_ARRAY( ARRAY_TYPE(numSplitPartitions), PM() );
+ PART_MAP();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ out << "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() <<
+ " **_ppe, struct " << FSM_NAME() << " *fsm );\n";
+ }
+ out << "\n";
+}
+
+std::ostream &SplitCodeGen::ALL_PARTITIONS()
+{
+ /* compute the format string. */
+ int width = 0, high = redFsm->nParts - 1;
+ while ( high > 0 ) {
+ width++;
+ high /= 10;
+ }
+ assert( width <= 8 );
+ char suffFormat[] = "_%6.6d.c";
+ suffFormat[2] = suffFormat[4] = ( '0' + width );
+
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ char suffix[10];
+ sprintf( suffix, suffFormat, p );
+ char *fn = fileNameFromStem( cgd->fileName, suffix );
+ char *include = fileNameFromStem( cgd->fileName, ".h" );
+
+ /* Create the filter on the output and open it. */
+ output_filter *partFilter = new output_filter;
+ partFilter->open( fn, ios::out|ios::trunc );
+ if ( !outFilter->is_open() ) {
+ error() << "error opening " << fn << " for writing" << endl;
+ exit(1);
+ }
+
+ /* Attach the new file to the output stream. */
+ std::streambuf *prev_rdbuf = out.rdbuf( partFilter );
+
+ out <<
+ "#include \"" << include << "\"\n"
+ "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() <<
+ " **_ppe, struct " << FSM_NAME() << " *fsm )\n"
+ "{\n";
+ PARTITION( p ) <<
+ "}\n\n";
+ out.flush();
+
+ /* Fix the output stream. */
+ out.rdbuf( prev_rdbuf );
+ }
+ return out;
+}
+
+
+void SplitCodeGen::writeOutExec()
+{
+ out <<
+ " {\n"
+ " int _stat = 0;\n";
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << " goto _resume;\n";
+
+ /* In this reentry, to-state actions have already been executed on the
+ * partition-switch exit from the last partition. */
+ out << "_reenter:\n";
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out << "_resume:\n";
+
+ out <<
+ " switch ( " << PM() << "[" << CS() << "] ) {\n";
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ out <<
+ " case " << p << ":\n"
+ " _stat = partition" << p << "( &p, &pe, fsm );\n"
+ " break;\n";
+ }
+ out <<
+ " }\n"
+ " if ( _stat )\n"
+ " goto _reenter;\n";
+
+ if ( cgd->hasEnd )
+ out << " _out: {}\n";
+
+ out <<
+ " }\n";
+
+ ALL_PARTITIONS();
+}
+
+void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call: {
+ /* In split code gen we only need labels for transitions across
+ * partitions. */
+ if ( fromState->partition == item->targState->partition ){
+ /* Mark the target as needing a label. */
+ item->targState->labelNeeded = true;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ if ( item->children != 0 )
+ setLabelsNeeded( fromState, item->children );
+ }
+}
+
+void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans )
+{
+ /* In the split code gen we don't need labels for transitions across
+ * partitions. */
+ if ( fromState->partition == trans->targ->partition ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ trans->labelNeeded = true;
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+ }
+
+ /* Need labels for states that have goto or calls in action code
+ * invoked on characters (ie, not from out action code). */
+ if ( trans->action != 0 ) {
+ /* Loop the actions. */
+ for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) {
+ /* Get the action and walk it's tree. */
+ setLabelsNeeded( fromState, act->value->inlineList );
+ }
+ }
+}
+
+/* Set up labelNeeded flag for each state. */
+void SplitCodeGen::setLabelsNeeded()
+{
+ /* If we use the _again label, then we the _again switch, which uses all
+ * labels. */
+ if ( useAgainLabel() ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = true;
+ }
+ else {
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ trans->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ )
+ setLabelsNeeded( st, tel->value );
+
+ for ( RedTransList::Iter tel = st->outSingle; tel.lte(); tel++ )
+ setLabelsNeeded( st, tel->value );
+
+ if ( st->defTrans != 0 )
+ setLabelsNeeded( st, st->defTrans );
+ }
+ }
+
+ if ( cgd->hasEnd ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+ }
+ else {
+ if ( redFsm->errState != 0 )
+ redFsm->errState->outNeeded = true;
+
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Any state with a transition in that has a break will need an
+ * out label. */
+ if ( trans->action != 0 && trans->action->anyBreakStmt() )
+ trans->targ->outNeeded = true;
+ }
+ }
+}
+
diff --git a/rlcodegen/splitcodegen.h b/rlcodegen/splitcodegen.h
new file mode 100644
index 0000000..03c2139
--- /dev/null
+++ b/rlcodegen/splitcodegen.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _SPLITCODEGEN_H
+#define _SPLITCODEGEN_H
+
+#include "ipgotocodegen.h"
+
+class SplitCodeGen : public IpGotoCodeGen
+{
+public:
+ bool ptOutLabelUsed;
+
+ std::ostream &PART_MAP();
+ std::ostream &EXIT_STATES( int partition );
+ std::ostream &PART_TRANS( int partition );
+ std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+ void GOTO_HEADER( RedStateAp *state, bool stateInPartition );
+ std::ostream &STATE_GOTOS( int partition );
+ std::ostream &PARTITION( int partition );
+ std::ostream &ALL_PARTITIONS();
+ void writeOutData();
+ void writeOutExec();
+ void writeOutParts();
+
+ void setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList );
+ void setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans );
+ void setLabelsNeeded();
+
+ int currentPartition;
+};
+
+struct CSplitCodeGen
+ : public SplitCodeGen, public CCodeGen
+{
+};
+
+/*
+ * class DIpGotoCodeGen
+ */
+struct DSplitCodeGen
+ : public IpGotoCodeGen, public DCodeGen
+{
+};
+
+
+#endif /* _SPLITCODEGEN_H */
diff --git a/rlcodegen/tabcodegen.cpp b/rlcodegen/tabcodegen.cpp
new file mode 100644
index 0000000..b382256
--- /dev/null
+++ b/rlcodegen/tabcodegen.cpp
@@ -0,0 +1,996 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlcodegen.h"
+#include "tabcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+/* Determine if we should use indicies or not. */
+void TabCodeGen::calcIndexSize()
+{
+ int sizeWithInds = 0, sizeWithoutInds = 0;
+
+ /* Calculate cost of using with indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithInds += arrayTypeSize(maxIndex) * totalIndex;
+ }
+ sizeWithInds += arrayTypeSize(maxState) * redFsm->transSet.length();
+ if ( anyActions() )
+ sizeWithInds += arrayTypeSize(maxActionLoc) * redFsm->transSet.length();
+
+ /* Calculate the cost of not using indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithoutInds += arrayTypeSize(maxState) * totalIndex;
+ if ( anyActions() )
+ sizeWithoutInds += arrayTypeSize(maxActionLoc) * totalIndex;
+ }
+
+ /* If using indicies reduces the size, use them. */
+ useIndicies = sizeWithInds < sizeWithoutInds;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ out << act;
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ /* If there are actions, emit them. Otherwise emit zero. */
+ int act = 0;
+ if ( trans->action != 0 )
+ act = trans->action->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &TabCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curKeyOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the key offset. */
+ out << curKeyOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->stateCondList.length();
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::KEY_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curKeyOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the key offset. */
+ out << curKeyOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::INDEX_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ curIndOffset += st->outSingle.length() + st->outRange.length();
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ out << st->stateCondList.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::SINGLE_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ out << st->outSingle.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::RANGE_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit length of range index. */
+ out << st->outRange.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ TO_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ FROM_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ EOF_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the state's transitions. */
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ /* Lower key. */
+ out << KEY( sc->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+
+ /* Upper key. */
+ out << KEY( sc->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_SPACES()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the state's transitions. */
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ /* Cond Space id. */
+ out << sc->condSpace->condSpaceId << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ out << KEY( stel->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Loop the state's transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ /* Lower key. */
+ out << KEY( rtel->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+
+ /* Upper key. */
+ out << KEY( rtel->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::INDICIES()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ out << stel->value->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ out << rtel->value->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 ) {
+ out << st->defTrans->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TRANS_TARGS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ RedTransAp *trans = stel->value;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ RedTransAp *trans = rtel->value;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default target state. */
+ if ( st->defTrans != 0 ) {
+ RedTransAp *trans = st->defTrans;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTIONS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ RedTransAp *trans = stel->value;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ RedTransAp *trans = rtel->value;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 ) {
+ RedTransAp *trans = st->defTrans;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TRANS_TARGS_WI()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalStates = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write out the target state. */
+ RedTransAp *trans = transPtrs[t];
+ out << trans->targ->id;
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalStates % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTIONS_WI()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalAct = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write the function for the transition. */
+ RedTransAp *trans = transPtrs[t];
+ TRANS_ACTION( trans );
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalAct % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+void TabCodeGen::LOCATE_TRANS()
+{
+ out <<
+ " _keys = " << ARR_OFF( K(), KO() + "[" + CS() + "]" ) << ";\n"
+ " _trans = " << IO() << "[" << CS() << "];\n"
+ "\n"
+ " _klen = " << SL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + _klen - 1;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + ((_upper-_lower) >> 1);\n"
+ " if ( " << GET_WIDE_KEY() << " < *_mid )\n"
+ " _upper = _mid - 1;\n"
+ " else if ( " << GET_WIDE_KEY() << " > *_mid )\n"
+ " _lower = _mid + 1;\n"
+ " else {\n"
+ " _trans += (_mid - _keys);\n"
+ " goto _match;\n"
+ " }\n"
+ " }\n"
+ " _keys += _klen;\n"
+ " _trans += _klen;\n"
+ " }\n"
+ "\n"
+ " _klen = " << RL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " _trans += ((_mid - _keys)>>1);\n"
+ " goto _match;\n"
+ " }\n"
+ " }\n"
+ " _trans += _klen;\n"
+ " }\n"
+ "\n";
+}
+
+void TabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void TabCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void TabCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void TabCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void TabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" <<
+ TOP() << "]; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void TabCodeGen::writeOutData()
+{
+ /* If there are any transtion functions then output the array. If there
+ * are none, don't bother emitting an empty array that won't be used. */
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyConditions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxCondOffset), CO() );
+ COND_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondLen), CL() );
+ COND_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxCondSpaceId), C() );
+ COND_SPACES();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( ARRAY_TYPE(maxKeyOffset), KO() );
+ KEY_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxSingleLen), SL() );
+ SINGLE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxRangeLen), RL() );
+ RANGE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxIndexOffset), IO() );
+ INDEX_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( useIndicies ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() );
+ TRANS_ACTIONS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+ else {
+ OPEN_ARRAY( ARRAY_TYPE(maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+
+ if ( anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( cgd->writeFirstFinal ) {
+ STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( cgd->writeErr ) {
+ STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+}
+
+void TabCodeGen::COND_TRANSLATE()
+{
+ out <<
+ " _widec = " << GET_KEY() << ";\n"
+ " _klen = " << CL() << "[" << CS() << "];\n"
+ " _keys = " << ARR_OFF( CK(), "(" + CO() + "[" + CS() + "]*2)" ) << ";\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " switch ( " << C() << "[" << CO() << "[" << CS() << "]"
+ " + ((_mid - _keys)>>1)] ) {\n";
+
+ for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) {
+ CondSpace *condSpace = csi;
+ out << " case " << condSpace->condSpaceId << ": {\n";
+ out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(2) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+
+ out <<
+ " break;\n"
+ " }\n";
+ }
+
+ SWITCH_DEFAULT();
+
+ out <<
+ " }\n"
+ " break;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+}
+
+void TabCodeGen::writeOutExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _klen";
+
+ if ( anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " " << UINT() << " _trans;\n";
+
+ if ( anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( anyToStateActions() || anyRegActions() || anyFromStateActions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ "\n";
+
+ if ( cgd->hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ out << "_match:\n";
+
+ if ( anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ if ( useIndicies )
+ out << " _trans = " << I() << "[_trans];\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 )\n {\n"
+ " switch ( *_acts++ )\n {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( anyRegActions() || anyActionGotos() || anyActionCalls() || anyActionRets() )
+ out << "_again:\n";
+
+ if ( anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( cgd->hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+
+void TabCodeGen::writeOutEOF()
+{
+ if ( anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/rlcodegen/tabcodegen.h b/rlcodegen/tabcodegen.h
new file mode 100644
index 0000000..34af924
--- /dev/null
+++ b/rlcodegen/tabcodegen.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Eric Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _TABCODEGEN_H
+#define _TABCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+
+/*
+ * TabCodeGen
+ */
+class TabCodeGen : virtual public FsmCodeGen
+{
+public:
+ virtual ~TabCodeGen() { }
+ virtual void writeOutData();
+ virtual void writeOutExec();
+
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ std::ostream &COND_KEYS();
+ std::ostream &COND_SPACES();
+ std::ostream &KEYS();
+ std::ostream &INDICIES();
+ std::ostream &COND_OFFSETS();
+ std::ostream &KEY_OFFSETS();
+ std::ostream &INDEX_OFFSETS();
+ std::ostream &COND_LENS();
+ std::ostream &SINGLE_LENS();
+ std::ostream &RANGE_LENS();
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+ std::ostream &TRANS_TARGS();
+ std::ostream &TRANS_ACTIONS();
+ std::ostream &TRANS_TARGS_WI();
+ std::ostream &TRANS_ACTIONS_WI();
+ void LOCATE_TRANS();
+
+ void COND_TRANSLATE();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+ virtual void calcIndexSize();
+ virtual void writeOutEOF();
+};
+
+
+/*
+ * CTabCodeGen
+ */
+struct CTabCodeGen
+ : public TabCodeGen, public CCodeGen
+{
+};
+
+/*
+ * DTabCodeGen
+ */
+struct DTabCodeGen
+ : public TabCodeGen, public DCodeGen
+{
+};
+
+
+#endif /* _TABCODEGEN_H */
diff --git a/rlcodegen/xmlparse.kh b/rlcodegen/xmlparse.kh
new file mode 100644
index 0000000..3bed642
--- /dev/null
+++ b/rlcodegen/xmlparse.kh
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _XMLPARSE_H
+#define _XMLPARSE_H
+
+#include "vector.h"
+#include "rlcodegen.h"
+#include "gendata.h"
+#include <iostream>
+
+using std::ostream;
+
+struct XMLTagHashPair
+{
+ char *name;
+ int id;
+};
+
+struct Token
+{
+ XMLTag *tag;
+ InputLoc loc;
+};
+
+struct InlineItem;
+struct InlineList;
+
+struct LmSwitchVect;
+struct LmSwitchAction;
+
+//#include "xmlpdefs.h"
+
+/* These come from the scanner and point back into the parser. We will borrow
+ * them for error reporting. */
+//extern YYSTYPE *yylval;
+//extern YYLTYPE *yylloc;
+
+//int yylex( YYSTYPE *, YYLTYPE *);
+void scannerInit();
+
+extern char *lelNames[];
+
+struct LangEl;
+
+struct Parser
+{
+ %%{
+ parser Parser;
+
+ token TAG_unknown, TAG_ragel, TAG_ragel_def, TAG_host, TAG_state_list,
+ TAG_state, TAG_trans_list, TAG_t, TAG_machine, TAG_start_state,
+ TAG_action_list, TAG_action_table_list, TAG_action,
+ TAG_action_table, TAG_alphtype, TAG_element, TAG_getkey,
+ TAG_state_actions, TAG_entry_points, TAG_sub_action,
+ TAG_cond_space_list, TAG_cond_space, TAG_cond_list, TAG_c;
+
+ # Inline block tokens.
+ token TAG_text, TAG_goto, TAG_call, TAG_next, TAG_goto_expr,
+ TAG_call_expr, TAG_next_expr, TAG_ret, TAG_pchar, TAG_char,
+ TAG_hold, TAG_exec, TAG_holdte, TAG_execte, TAG_curs, TAG_targs,
+ TAG_entry, TAG_data, TAG_lm_switch, TAG_init_act, TAG_set_act,
+ TAG_set_tokend, TAG_get_tokend, TAG_init_tokstart,
+ TAG_set_tokstart, TAG_write, TAG_curstate, TAG_access, TAG_break,
+ TAG_option;
+
+ interface;
+ }%%
+
+ Parser( char *fileName )
+ : fileName(fileName), sourceFileName(0)
+ {
+ //pd = new ParseData( fileName, sectionName, sectionLoc );
+ }
+
+ int token( int id );
+ int token( int tokenId, Token &token );
+ int token( XMLTag *tag, int col, int line );
+
+ /* Report an error encountered by the parser. */
+ ostream &error();
+ ostream &error( const InputLoc &loc );
+ ostream &parser_error( int tokId, Token &token );
+
+ /* The name of the root section, this does not change during an include. */
+ char *fileName;
+
+ /* Collected during parsing. */
+ char *sourceFileName;
+ char *attrKey;
+ char *attrValue;
+ int curAction;
+ int curActionTable;
+ int curTrans;
+ int curState;
+ int curCondSpace;
+ int curStateCond;
+
+ CodeGenMap codeGenMap;
+};
+
+#endif /* _XMLPARSE_H */
diff --git a/rlcodegen/xmlparse.kl b/rlcodegen/xmlparse.kl
new file mode 100644
index 0000000..7e2dbf0
--- /dev/null
+++ b/rlcodegen/xmlparse.kl
@@ -0,0 +1,875 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "xmlparse.h"
+#include "rlcodegen.h"
+#include "common.h"
+#include "gendata.h"
+#include <iostream>
+
+using std::cout;
+using std::ostream;
+using std::istream;
+using std::cerr;
+using std::endl;
+
+Key readKey( char *td, char **end );
+long readOffsetPtr( char *td, char **end );
+unsigned long readLength( char *td );
+
+%%{
+
+parser Parser;
+
+include "xmlparse.kh";
+
+start: tag_ragel;
+start:
+ final {
+ /* If we get no input the assumption is that the frontend died and
+ * emitted an error. */
+ gblErrorCount += 1;
+ };
+
+tag_ragel: tag_ragel_head host_or_def_list '/' TAG_ragel;
+
+tag_ragel_head: TAG_ragel
+ final {
+ Attribute *fileNameAttr = $1->tag->findAttr( "filename" );
+ if ( fileNameAttr == 0 ) {
+ xml_error($1->loc) << "tag <ragel> requires a filename attribute" << endl;
+ exit(1);
+ }
+ else
+ sourceFileName = fileNameAttr->value;
+
+ Attribute *langAttr = $1->tag->findAttr( "lang" );
+ if ( langAttr == 0 )
+ xml_error($1->loc) << "tag <ragel> requires a lang attribute" << endl;
+ else {
+ if ( strcmp( langAttr->value, "C" ) == 0 ) {
+ hostLangType = CCode;
+ hostLang = &hostLangC;
+ }
+ else if ( strcmp( langAttr->value, "D" ) == 0 ) {
+ hostLangType = DCode;
+ hostLang = &hostLangD;
+ }
+ else if ( strcmp( langAttr->value, "Java" ) == 0 ) {
+ hostLangType = JavaCode;
+ hostLang = &hostLangJava;
+ }
+ }
+
+ /* Eventually more types will be supported. */
+ if ( hostLangType == JavaCode && codeStyle != GenTables ) {
+ error() << "java: only the table code style -T0 is "
+ "currently supported" << endl;
+ }
+
+ openOutput( sourceFileName );
+ };
+
+host_or_def_list: host_or_def_list host_or_def;
+host_or_def_list: ;
+
+host_or_def: host;
+host_or_def: ragel_def;
+
+host:
+ TAG_host '/' TAG_host
+ final {
+ Attribute *lineAttr = $1->tag->findAttr( "line" );
+ if ( lineAttr == 0 )
+ xml_error($1->loc) << "tag <host> requires a line attribute" << endl;
+ else {
+ int line = atoi( lineAttr->value );
+ if ( outputFormat == OutCode )
+ lineDirective( *outStream, sourceFileName, line );
+ }
+
+ if ( outputFormat == OutCode )
+ *outStream << $3->tag->content;
+ };
+
+ragel_def:
+ tag_ragel_def_head ragel_def_item_list '/' TAG_ragel_def
+ final {
+ if ( gblErrorCount == 0 )
+ cgd->generate();
+ };
+
+tag_ragel_def_head: TAG_ragel_def
+ final {
+ bool wantComplete = outputFormat != OutGraphvizDot;
+
+ char *fsmName = 0;
+ Attribute *nameAttr = $1->tag->findAttr( "name" );
+ if ( nameAttr != 0 ) {
+ fsmName = nameAttr->value;
+
+ CodeGenMapEl *mapEl = codeGenMap.find( fsmName );
+ if ( mapEl != 0 )
+ cgd = mapEl->value;
+ else {
+ cgd = new CodeGenData( sourceFileName, fsmName, wantComplete );
+ codeGenMap.insert( fsmName, cgd );
+ }
+ }
+ else {
+ cgd = new CodeGenData( sourceFileName, fsmName, wantComplete );
+ }
+
+ cgd->writeOps = 0;
+ cgd->writeData = false;
+ cgd->writeInit = false;
+ cgd->writeExec = false;
+ cgd->writeEOF = false;
+ ::keyOps = &cgd->thisKeyOps;
+ };
+
+ragel_def_item_list: ragel_def_item_list ragel_def_item;
+ragel_def_item_list: ;
+
+ragel_def_item: tag_alph_type;
+ragel_def_item: tag_getkey_expr;
+ragel_def_item: tag_access_expr;
+ragel_def_item: tag_curstate_expr;
+ragel_def_item: tag_machine;
+ragel_def_item: tag_write;
+
+tag_alph_type: TAG_alphtype '/' TAG_alphtype
+ final {
+ if ( ! cgd->setAlphType( $3->tag->content ) )
+ xml_error($1->loc) << "tag <alphtype> specifies unknown alphabet type" << endl;
+ };
+
+tag_getkey_expr: TAG_getkey inline_list '/' TAG_getkey
+ final {
+ cgd->getKeyExpr = $2->inlineList;
+ };
+
+tag_access_expr: TAG_access inline_list '/' TAG_access
+ final {
+ cgd->accessExpr = $2->inlineList;
+ };
+
+tag_curstate_expr: TAG_curstate inline_list '/' TAG_curstate
+ final {
+ cgd->curStateExpr = $2->inlineList;
+ };
+
+tag_write: TAG_write write_option_list '/' TAG_write
+ final {
+ Attribute *what = $1->tag->findAttr( "what" );
+ if ( what == 0 ) {
+ xml_error($1->loc) << "tag <write> requires a what attribute" << endl;
+ }
+ else {
+ if ( strcmp( what->value, "data" ) == 0 )
+ cgd->writeData = true;
+ else if ( strcmp( what->value, "init" ) == 0 )
+ cgd->writeInit = true;
+ else if ( strcmp( what->value, "exec" ) == 0 )
+ cgd->writeExec = true;
+ else if ( strcmp( what->value, "eof" ) == 0 )
+ cgd->writeEOF = true;
+ }
+ };
+
+write_option_list: write_option_list tag_option;
+write_option_list: ;
+
+tag_option: TAG_option '/' TAG_option
+ final {
+ char *content = $3->tag->content;
+ if ( strcmp( content, "noend" ) == 0 )
+ cgd->writeOps |= WO_NOEND;
+ else if ( strcmp( content, "noerror" ) == 0 )
+ cgd->writeOps |= WO_NOERROR;
+ else if ( strcmp( content, "noprefix" ) == 0 )
+ cgd->writeOps |= WO_NOPREFIX;
+ else if ( strcmp( content, "nofinal" ) == 0 )
+ cgd->writeOps |= WO_NOFF;
+ else {
+ warning() << "unrecognized write option" << endl;
+ }
+ };
+
+tag_machine: tag_machine_head machine_item_list '/' TAG_machine
+ final {
+ cgd->finishMachine();
+ };
+
+tag_machine_head: TAG_machine
+ final {
+ cgd->createMachine();
+ };
+
+machine_item_list: machine_item_list machine_item;
+machine_item_list: ;
+
+machine_item: tag_start_state;
+machine_item: tag_entry_points;
+machine_item: tag_state_list;
+machine_item: tag_action_list;
+machine_item: tag_action_table_list;
+machine_item: tag_cond_space_list;
+
+#
+# States.
+#
+
+tag_start_state: TAG_start_state '/' TAG_start_state
+ final {
+ unsigned long startState = strtoul( $3->tag->content, 0, 10 );
+ cgd->setStartState( startState );
+ };
+
+tag_entry_points: TAG_entry_points entry_point_list '/' TAG_entry_points
+ final {
+ Attribute *errorAttr = $1->tag->findAttr( "error" );
+ if ( errorAttr != 0 )
+ cgd->setForcedErrorState();
+ };
+
+entry_point_list: entry_point_list tag_entry;
+entry_point_list: ;
+
+tag_entry: TAG_entry '/' TAG_entry
+ final {
+ Attribute *nameAttr = $1->tag->findAttr( "name" );
+ if ( nameAttr == 0 ) {
+ xml_error($1->loc) << "tag <entry_points>::<entry> "
+ "requires a name attribute" << endl;
+ }
+ else {
+ char *data = $3->tag->content;
+ unsigned long entry = strtoul( data, &data, 10 );
+ cgd->addEntryPoint( nameAttr->value, entry );
+ }
+ };
+
+tag_state_list: tag_state_list_head state_list '/' TAG_state_list;
+
+tag_state_list_head: TAG_state_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <state_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initStateList( length );
+ curState = 0;
+ }
+ };
+
+state_list: state_list tag_state;
+state_list: ;
+
+tag_state: TAG_state state_item_list '/' TAG_state
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "final" );
+ if ( lengthAttr != 0 )
+ cgd->setFinal( curState );
+ curState += 1;
+ };
+
+state_item_list: state_item_list state_item;
+state_item_list: ;
+
+state_item: tag_state_actions;
+state_item: tag_state_cond_list;
+state_item: tag_trans_list;
+
+tag_state_actions: TAG_state_actions '/' TAG_state_actions
+ final {
+ char *ad = $3->tag->content;
+
+ long toStateAction = readOffsetPtr( ad, &ad );
+ long fromStateAction = readOffsetPtr( ad, &ad );
+ long eofAction = readOffsetPtr( ad, &ad );
+
+ cgd->setStateActions( curState, toStateAction,
+ fromStateAction, eofAction );
+ };
+
+tag_state_cond_list: tag_state_cond_list_head state_cond_list '/' TAG_cond_list;
+
+tag_state_cond_list_head: TAG_cond_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <cond_list> requires a length attribute" << endl;
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initStateCondList( curState, length );
+ curStateCond = 0;
+ }
+ };
+
+state_cond_list: state_cond_list state_cond;
+state_cond_list: ;
+
+state_cond: TAG_c '/' TAG_c
+ final {
+ char *td = $3->tag->content;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long condId = readOffsetPtr( td, &td );
+ cgd->addStateCond( curState, lowKey, highKey, condId );
+ };
+
+tag_trans_list: tag_trans_list_head trans_list '/' TAG_trans_list
+ final {
+ cgd->finishTransList( curState );
+ };
+
+tag_trans_list_head: TAG_trans_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <trans_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initTransList( curState, length );
+ curTrans = 0;
+ }
+ };
+
+trans_list: trans_list tag_trans;
+trans_list: ;
+
+tag_trans: TAG_t '/' TAG_t
+ final {
+ char *td = $3->tag->content;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long targ = readOffsetPtr( td, &td );
+ long action = readOffsetPtr( td, &td );
+
+ cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action );
+ };
+
+#
+# Action Lists.
+#
+
+tag_action_list: tag_action_list_head action_list '/' TAG_action_list;
+
+tag_action_list_head: TAG_action_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <action_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionList( length );
+ curAction = 0;
+ }
+ };
+
+action_list: action_list tag_action;
+action_list: ;
+
+#
+# Actions.
+#
+
+tag_action: TAG_action inline_list '/' TAG_action
+ final {
+ Attribute *lineAttr = $1->tag->findAttr( "line" );
+ Attribute *colAttr = $1->tag->findAttr( "col" );
+ Attribute *nameAttr = $1->tag->findAttr( "name" );
+ if ( lineAttr == 0 || colAttr == 0)
+ xml_error($1->loc) << "tag <action> requires a line and col attributes" << endl;
+ else {
+ unsigned long line = strtoul( lineAttr->value, 0, 10 );
+ unsigned long col = strtoul( colAttr->value, 0, 10 );
+
+ char *name = 0;
+ if ( nameAttr != 0 )
+ name = nameAttr->value;
+
+ cgd->newAction( curAction++, name, line, col, $2->inlineList );
+ }
+ };
+
+nonterm inline_list
+{
+ InlineList *inlineList;
+};
+
+
+inline_list: inline_list inline_item
+ final {
+ /* Append the item to the list, return the list. */
+ $1->inlineList->append( $2->inlineItem );
+ $$->inlineList = $1->inlineList;
+ };
+
+inline_list:
+ final {
+ /* Start with empty list. */
+ $$->inlineList = new InlineList;
+ };
+
+nonterm inline_item_type
+{
+ InlineItem *inlineItem;
+};
+
+nonterm inline_item uses inline_item_type;
+
+inline_item: tag_text final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_goto final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_call final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_next final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_goto_expr final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_call_expr final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_next_expr final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_ret final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_break final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_pchar final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_char final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_hold final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_exec final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_holdte final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_execte final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_curs final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_targs final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_il_entry final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_init_tokstart final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_init_act final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_get_tokend final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_set_tokstart final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_set_tokend final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_set_act final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_sub_action final { $$->inlineItem = $1->inlineItem; };
+inline_item: tag_lm_switch final { $$->inlineItem = $1->inlineItem; };
+
+nonterm tag_text uses inline_item_type;
+nonterm tag_goto uses inline_item_type;
+nonterm tag_call uses inline_item_type;
+nonterm tag_next uses inline_item_type;
+nonterm tag_goto_expr uses inline_item_type;
+nonterm tag_call_expr uses inline_item_type;
+nonterm tag_next_expr uses inline_item_type;
+nonterm tag_ret uses inline_item_type;
+nonterm tag_break uses inline_item_type;
+nonterm tag_pchar uses inline_item_type;
+nonterm tag_char uses inline_item_type;
+nonterm tag_hold uses inline_item_type;
+nonterm tag_exec uses inline_item_type;
+nonterm tag_holdte uses inline_item_type;
+nonterm tag_execte uses inline_item_type;
+nonterm tag_curs uses inline_item_type;
+nonterm tag_targs uses inline_item_type;
+nonterm tag_il_entry uses inline_item_type;
+nonterm tag_init_tokstart uses inline_item_type;
+nonterm tag_init_act uses inline_item_type;
+nonterm tag_get_tokend uses inline_item_type;
+nonterm tag_set_tokstart uses inline_item_type;
+nonterm tag_set_tokend uses inline_item_type;
+nonterm tag_set_act uses inline_item_type;
+nonterm tag_sub_action uses inline_item_type;
+nonterm tag_lm_switch uses inline_item_type;
+
+tag_text: TAG_text '/' TAG_text
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Text );
+ $$->inlineItem->data = $3->tag->content;
+ };
+
+tag_goto: TAG_goto '/' TAG_goto
+ final {
+ int targ = strtol( $3->tag->content, 0, 10 );
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Goto );
+ $$->inlineItem->targId = targ;
+ };
+
+tag_call: TAG_call '/' TAG_call
+ final {
+ int targ = strtol( $3->tag->content, 0, 10 );
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Call );
+ $$->inlineItem->targId = targ;
+ };
+
+tag_next: TAG_next '/' TAG_next
+ final {
+ int targ = strtol( $3->tag->content, 0, 10 );
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Next );
+ $$->inlineItem->targId = targ;
+ };
+
+tag_goto_expr: TAG_goto_expr inline_list '/' TAG_goto_expr
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::GotoExpr );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+tag_call_expr: TAG_call_expr inline_list '/' TAG_call_expr
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::CallExpr );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+tag_next_expr: TAG_next_expr inline_list '/' TAG_next_expr
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::NextExpr );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+tag_ret: TAG_ret '/' TAG_ret
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Ret );
+ };
+
+tag_break: TAG_break '/' TAG_break
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Break );
+ };
+
+tag_pchar: TAG_pchar '/' TAG_pchar
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::PChar );
+ };
+
+tag_char: TAG_char '/' TAG_char
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Char );
+ };
+
+tag_hold: TAG_hold '/' TAG_hold
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Hold );
+ };
+
+tag_exec: TAG_exec inline_list '/' TAG_exec
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Exec );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+tag_holdte: TAG_holdte '/' TAG_holdte
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::HoldTE );
+ };
+
+tag_execte: TAG_execte inline_list '/' TAG_execte
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::ExecTE );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+tag_curs: TAG_curs '/' TAG_curs
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Curs );
+ };
+
+tag_targs: TAG_targs '/' TAG_targs
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Targs );
+ };
+
+tag_il_entry: TAG_entry '/' TAG_entry
+ final {
+ int targ = strtol( $3->tag->content, 0, 10 );
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::Entry );
+ $$->inlineItem->targId = targ;
+ };
+
+tag_init_tokstart: TAG_init_tokstart '/' TAG_init_tokstart
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitTokStart );
+ };
+
+tag_init_act: TAG_init_act '/' TAG_init_act
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitAct );
+ };
+
+tag_get_tokend: TAG_get_tokend '/' TAG_get_tokend
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd );
+ };
+
+tag_set_tokstart: TAG_set_tokstart '/' TAG_set_tokstart
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokStart );
+ cgd->hasLongestMatch = true;
+ };
+
+tag_set_tokend: TAG_set_tokend '/' TAG_set_tokend
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd );
+ $$->inlineItem->offset = strtol( $3->tag->content, 0, 10 );
+ };
+
+tag_set_act: TAG_set_act '/' TAG_set_act
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetActId );
+ $$->inlineItem->lmId = strtol( $3->tag->content, 0, 10 );
+ };
+
+tag_sub_action: TAG_sub_action inline_list '/' TAG_sub_action
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction );
+ $$->inlineItem->children = $2->inlineList;
+ };
+
+# Action switches.
+tag_lm_switch: TAG_lm_switch lm_action_list '/' TAG_lm_switch
+ final {
+ bool handlesError = false;
+ Attribute *handlesErrorAttr = $1->tag->findAttr( "handles_error" );
+ if ( handlesErrorAttr != 0 )
+ handlesError = true;
+
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSwitch );
+ $$->inlineItem->children = $2->inlineList;
+ $$->inlineItem->handlesError = handlesError;
+ };
+
+nonterm lm_action_list
+{
+ InlineList *inlineList;
+};
+
+lm_action_list: lm_action_list tag_inline_action
+ final {
+ $$->inlineList = $1->inlineList;
+ $$->inlineList->append( $2->inlineItem );
+ };
+lm_action_list:
+ final {
+ $$->inlineList = new InlineList;
+ };
+
+nonterm tag_inline_action uses inline_item_type;
+
+tag_inline_action: TAG_sub_action inline_list '/' TAG_sub_action
+ final {
+ $$->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction );
+ $$->inlineItem->children = $2->inlineList;
+
+ Attribute *idAttr = $1->tag->findAttr( "id" );
+ if ( idAttr != 0 ) {
+ unsigned long id = strtoul( idAttr->value, 0, 10 );
+ $$->inlineItem->lmId = id;
+ }
+ };
+
+#
+# Lists of Actions.
+#
+
+tag_action_table_list:
+ tag_action_table_list_head action_table_list '/' TAG_action_table_list;
+
+tag_action_table_list_head: TAG_action_table_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 ) {
+ xml_error($1->loc) << "tag <action_table_list> requires "
+ "a length attribute" << endl;
+ }
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionTableList( length );
+ curActionTable = 0;
+ }
+ };
+
+action_table_list: action_table_list tag_action_table;
+action_table_list: ;
+
+tag_action_table: TAG_action_table '/' TAG_action_table
+ final {
+ /* Find the length of the action table. */
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <at> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+
+ /* Collect the action table. */
+ RedAction *redAct = cgd->allActionTables + curActionTable;
+ redAct->actListId = curActionTable;
+ redAct->key.setAsNew( length );
+ char *ptr = $3->tag->content;
+ int pos = 0;
+ while ( *ptr != 0 ) {
+ unsigned long actionId = strtoul( ptr, &ptr, 10 );
+ redAct->key[pos].key = 0;
+ redAct->key[pos].value = cgd->allActions+actionId;
+ pos += 1;
+ }
+
+ /* Insert into the action table map. */
+ cgd->redFsm->actionMap.insert( redAct );
+ }
+
+ curActionTable += 1;
+ };
+
+#
+# Conditions.
+#
+
+tag_cond_space_list: tag_cond_space_list_head cond_space_list '/' TAG_cond_space_list;
+
+tag_cond_space_list_head: TAG_cond_space_list
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ if ( lengthAttr == 0 ) {
+ xml_error($1->loc) << "tag <cond_space_list> "
+ "requires a length attribute" << endl;
+ }
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initCondSpaceList( length );
+ curCondSpace = 0;
+ }
+ };
+
+cond_space_list: cond_space_list tag_cond_space;
+cond_space_list: tag_cond_space;
+
+tag_cond_space: TAG_cond_space '/' TAG_cond_space
+ final {
+ Attribute *lengthAttr = $1->tag->findAttr( "length" );
+ Attribute *idAttr = $1->tag->findAttr( "id" );
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <cond_space> requires a length attribute" << endl;
+ else {
+ if ( lengthAttr == 0 )
+ xml_error($1->loc) << "tag <cond_space> requires an id attribute" << endl;
+ else {
+ unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 );
+ ulong length = readLength( lengthAttr->value );
+
+ char *td = $3->tag->content;
+ Key baseKey = readKey( td, &td );
+
+ cgd->newCondSpace( curCondSpace, condSpaceId, baseKey );
+ for ( ulong a = 0; a < length; a++ ) {
+ long actionOffset = readOffsetPtr( td, &td );
+ cgd->condSpaceItem( curCondSpace, actionOffset );
+ }
+ curCondSpace += 1;
+ }
+ }
+ };
+
+}%%
+
+unsigned long readLength( char *td )
+{
+ return strtoul( td, 0, 10 );
+}
+
+Key readKey( char *td, char **end )
+{
+ if ( keyOps->isSigned )
+ return Key( strtol( td, end, 10 ) );
+ else
+ return Key( strtoul( td, end, 10 ) );
+}
+
+long readOffsetPtr( char *td, char **end )
+{
+ while ( *td == ' ' || *td == '\t' )
+ td++;
+
+ if ( *td == 'x' ) {
+ if ( end != 0 )
+ *end = td + 1;
+ return -1;
+ }
+
+ return strtol( td, end, 10 );
+}
+
+ostream &Parser::error()
+{
+ gblErrorCount += 1;
+ cerr << PROGNAME ": ";
+ return cerr;
+}
+
+ostream &Parser::error( const InputLoc &loc )
+{
+ gblErrorCount += 1;
+ assert( fileName != 0 );
+ cerr << fileName << ":" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+ostream &Parser::parser_error( int tokId, Token &token )
+{
+ gblErrorCount += 1;
+ assert( fileName != 0 );
+ cerr << fileName << ":" << token.loc.line << ":" << token.loc.col;
+ if ( token.tag != 0 ) {
+ if ( token.tag->tagId == 0 )
+ cerr << ": at unknown tag";
+ else
+ cerr << ": at tag <" << token.tag->tagId->name << ">";
+ }
+ cerr << ": ";
+
+ return cerr;
+}
+
+int Parser::token( int tokenId, Token &tok )
+{
+ int res = parseLangEl( tokenId, tok );
+ if ( res < 0 ) {
+ parser_error( tokenId, tok ) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
+
+int Parser::token( int tokenId )
+{
+ Token tok;
+ tok.tag = 0;
+ return token( tokenId, tok );
+}
+
+int Parser::token( XMLTag *tag, int col, int line )
+{
+ Token tok;
+ tok.loc.col = col;
+ tok.loc.line = line;
+ tok.tag = tag;
+
+ if ( tag->type == XMLTag::Close ) {
+ int res = token( '/', tok );
+ if ( res < 0 )
+ return res;
+ }
+
+ tok.tag = tag;
+ return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok );
+}
diff --git a/rlcodegen/xmlparse.y b/rlcodegen/xmlparse.y
new file mode 100644
index 0000000..a837c87
--- /dev/null
+++ b/rlcodegen/xmlparse.y
@@ -0,0 +1,978 @@
+/*
+ * Copyright 2005-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+%{
+
+#include <iostream>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include "rlcodegen.h"
+#include "vector.h"
+#include "xmlparse.h"
+#include "gendata.h"
+
+using std::cerr;
+using std::endl;
+
+char *sourceFileName;
+char *attrKey;
+char *attrValue;
+int curAction;
+int curActionTable;
+int curTrans;
+int curState;
+int curCondSpace;
+int curStateCond;
+
+Key readKey( char *td, char **end );
+long readOffsetPtr( char *td, char **end );
+unsigned long readLength( char *td );
+
+CodeGenMap codeGenMap;
+
+%}
+
+%pure-parser
+
+%union {
+ /* General data types. */
+ char c;
+ char *data;
+ int integer;
+ AttrList *attrList;
+
+ /* Inline parse tree items. */
+ InlineItem *ilitem;
+ InlineList *illist;
+}
+
+%token TAG_unknown
+%token TAG_ragel
+%token TAG_ragel_def
+%token TAG_host
+%token TAG_state_list
+%token TAG_state
+%token TAG_trans_list
+%token TAG_t
+%token TAG_machine
+%token TAG_start_state
+%token TAG_action_list
+%token TAG_action_table_list
+%token TAG_action
+%token TAG_action_table
+%token TAG_alphtype
+%token TAG_element
+%token TAG_getkey
+%token TAG_state_actions
+%token TAG_entry_points
+%token TAG_sub_action
+%token TAG_cond_space_list
+%token TAG_cond_space
+%token TAG_cond_list
+%token TAG_c
+
+/* Inline block tokens. */
+%token TAG_text
+%token TAG_goto
+%token TAG_call
+%token TAG_next
+%token TAG_goto_expr
+%token TAG_call_expr
+%token TAG_next_expr
+%token TAG_ret
+%token TAG_pchar
+%token TAG_char
+%token TAG_hold
+%token TAG_exec
+%token TAG_holdte
+%token TAG_execte
+%token TAG_curs
+%token TAG_targs
+%token TAG_entry
+%token TAG_data
+%token TAG_lm_switch
+%token TAG_init_act
+%token TAG_set_act
+%token TAG_set_tokend
+%token TAG_get_tokend
+%token TAG_init_tokstart
+%token TAG_set_tokstart
+%token TAG_write
+%token TAG_curstate
+%token TAG_access
+%token TAG_break
+%token TAG_option
+
+%token <data> XML_Word
+%token <data> XML_Literal
+%type <attrList> AttributeList
+
+%type <illist> InlineList
+%type <ilitem> InlineItem
+%type <illist> LmActionList
+
+%type <ilitem> TagText
+%type <ilitem> TagGoto
+%type <ilitem> TagCall
+%type <ilitem> TagNext
+%type <ilitem> TagGotoExpr
+%type <ilitem> TagCallExpr
+%type <ilitem> TagNextExpr
+%type <ilitem> TagRet
+%type <ilitem> TagBreak
+%type <ilitem> TagPChar
+%type <ilitem> TagChar
+%type <ilitem> TagHold
+%type <ilitem> TagExec
+%type <ilitem> TagHoldTE
+%type <ilitem> TagExecTE
+%type <ilitem> TagCurs
+%type <ilitem> TagTargs
+%type <ilitem> TagIlEntry
+%type <ilitem> TagLmSwitch
+%type <ilitem> TagLmSetActId
+%type <ilitem> TagLmGetTokEnd
+%type <ilitem> TagLmSetTokEnd
+%type <ilitem> TagLmInitTokStart
+%type <ilitem> TagLmInitAct
+%type <ilitem> TagLmSetTokStart
+%type <ilitem> TagInlineAction
+%type <ilitem> TagSubAction
+
+%%
+
+/* Input is any number of input sections. An empty file is accepted. */
+input:
+ TagRagel |
+ /* Nothing */ {
+ /* Assume the frontend died if we get no input. It will emit an error.
+ * Cause us to return an error code. */
+ gblErrorCount += 1;
+ };
+
+TagRagel:
+ TagRagelHead
+ HostOrDefList
+ '<' '/' TAG_ragel '>';
+
+TagRagelHead:
+ '<' TAG_ragel AttributeList '>' {
+ Attribute *fileNameAttr = $3->find( "filename" );
+ if ( fileNameAttr == 0 )
+ xml_error(@2) << "tag <ragel> requires a filename attribute" << endl;
+ else
+ sourceFileName = fileNameAttr->value;
+
+ Attribute *langAttr = $3->find( "lang" );
+ if ( langAttr == 0 )
+ xml_error(@2) << "tag <ragel> requires a lang attribute" << endl;
+ else {
+ if ( strcmp( langAttr->value, "C" ) == 0 ) {
+ hostLangType = CCode;
+ hostLang = &hostLangC;
+ }
+ else if ( strcmp( langAttr->value, "D" ) == 0 ) {
+ hostLangType = DCode;
+ hostLang = &hostLangD;
+ }
+ else if ( strcmp( langAttr->value, "Java" ) == 0 ) {
+ hostLangType = JavaCode;
+ hostLang = &hostLangJava;
+ }
+ }
+
+ /* Eventually more types will be supported. */
+ if ( hostLangType == JavaCode && codeStyle != GenTables ) {
+ error() << "java: only the table code style -T0 is "
+ "currently supported" << endl;
+ }
+
+ openOutput( sourceFileName );
+ };
+
+AttributeList:
+ AttributeList Attribute {
+ $$ = $1;
+ $$->append( Attribute( attrKey, attrValue ) );
+ } |
+ /* Nothing */ {
+ $$ = new AttrList;
+ };
+
+Attribute:
+ XML_Word '=' XML_Literal {
+ attrKey = $1;
+ attrValue = $3;
+ };
+
+HostOrDefList:
+ HostOrDefList HostOrDef |
+ /* Nothing */;
+
+HostOrDef:
+ TagHost | TagRagelDef;
+
+TagHost:
+ TagHostHead
+ '<' '/' TAG_host '>' {
+ if ( outputFormat == OutCode )
+ *outStream << xmlData.data;
+ };
+
+TagHostHead:
+ '<' TAG_host AttributeList '>' {
+ Attribute *lineAttr = $3->find( "line" );
+ if ( lineAttr == 0 )
+ xml_error(@2) << "tag <host> requires a line attribute" << endl;
+ else {
+ int line = atoi( lineAttr->value );
+ if ( outputFormat == OutCode )
+ lineDirective( *outStream, sourceFileName, line );
+ }
+ };
+
+TagRagelDef:
+ RagelDefHead
+ RagelDefItemList
+ '<' '/' TAG_ragel_def '>' {
+ if ( gblErrorCount == 0 )
+ cgd->generate();
+ };
+
+RagelDefHead:
+ '<' TAG_ragel_def AttributeList '>' {
+ bool wantComplete = outputFormat != OutGraphvizDot;
+
+ char *fsmName = 0;
+ Attribute *nameAttr = $3->find( "name" );
+ if ( nameAttr != 0 ) {
+ fsmName = nameAttr->value;
+
+ CodeGenMapEl *mapEl = codeGenMap.find( fsmName );
+ if ( mapEl != 0 )
+ cgd = mapEl->value;
+ else {
+ cgd = new CodeGenData( sourceFileName, fsmName, wantComplete );
+ codeGenMap.insert( fsmName, cgd );
+ }
+ }
+ else {
+ cgd = new CodeGenData( sourceFileName, fsmName, wantComplete );
+ }
+
+ cgd->writeOps = 0;
+ cgd->writeData = false;
+ cgd->writeInit = false;
+ cgd->writeExec = false;
+ cgd->writeEOF = false;
+ ::keyOps = &cgd->thisKeyOps;
+ };
+
+RagelDefItemList:
+ RagelDefItemList RagelDefItem |
+ /* Nothing */;
+
+RagelDefItem:
+ TagAlphType |
+ TagGetKeyExpr |
+ TagAccessExpr |
+ TagCurStateExpr |
+ TagMachine |
+ TagWrite;
+
+TagWrite:
+ '<' TAG_write AttributeList '>'
+ OptionList
+ '<' '/' TAG_write '>' {
+ Attribute *what = $3->find( "what" );
+ if ( what == 0 ) {
+ xml_error(@2) << "tag <write> requires a what attribute" << endl;
+ }
+ else {
+ if ( strcmp( what->value, "data" ) == 0 )
+ cgd->writeData = true;
+ else if ( strcmp( what->value, "init" ) == 0 )
+ cgd->writeInit = true;
+ else if ( strcmp( what->value, "exec" ) == 0 )
+ cgd->writeExec = true;
+ else if ( strcmp( what->value, "eof" ) == 0 )
+ cgd->writeEOF = true;
+ }
+ };
+
+OptionList:
+ OptionList TagOption |
+ /* Nothing */;
+
+TagOption:
+ '<' TAG_option '>'
+ '<' '/' TAG_option '>' {
+ if ( strcmp( xmlData.data, "noend" ) == 0 )
+ cgd->writeOps |= WO_NOEND;
+ else if ( strcmp( xmlData.data, "noerror" ) == 0 )
+ cgd->writeOps |= WO_NOERROR;
+ else if ( strcmp( xmlData.data, "noprefix" ) == 0 )
+ cgd->writeOps |= WO_NOPREFIX;
+ else if ( strcmp( xmlData.data, "nofinal" ) == 0 )
+ cgd->writeOps |= WO_NOFF;
+ else {
+ warning() << "unrecognized write option" << endl;
+ }
+ };
+
+
+TagAlphType:
+ '<' TAG_alphtype '>'
+ '<' '/' TAG_alphtype '>' {
+ if ( ! cgd->setAlphType( xmlData.data ) )
+ xml_error(@2) << "tag <alphtype> specifies unknown alphabet type" << endl;
+ };
+
+TagGetKeyExpr:
+ '<' TAG_getkey '>'
+ InlineList
+ '<' '/' TAG_getkey '>' {
+ cgd->getKeyExpr = $4;
+ };
+
+TagAccessExpr:
+ '<' TAG_access '>'
+ InlineList
+ '<' '/' TAG_access '>' {
+ cgd->accessExpr = $4;
+ };
+
+TagCurStateExpr:
+ '<' TAG_curstate '>'
+ InlineList
+ '<' '/' TAG_curstate '>' {
+ cgd->curStateExpr = $4;
+ };
+
+TagMachine:
+ TagMachineHead
+ MachineItemList
+ '<' '/' TAG_machine '>' {
+ cgd->finishMachine();
+ };
+
+TagMachineHead:
+ '<' TAG_machine '>' {
+ cgd->createMachine();
+ };
+
+MachineItemList:
+ MachineItemList MachineItem |
+ /* Nothing */;
+
+MachineItem:
+ TagStartState |
+ TagEntryPoints |
+ TagStateList |
+ TagActionList |
+ TagActionTableList |
+ TagCondSpaceList;
+
+TagStartState:
+ '<' TAG_start_state '>'
+ '<' '/' TAG_start_state '>' {
+ unsigned long startState = strtoul( xmlData.data, 0, 10 );
+ cgd->setStartState( startState );
+ };
+
+TagEntryPoints:
+ '<' TAG_entry_points AttributeList '>'
+ EntryPointList
+ '<' '/' TAG_entry_points '>' {
+ Attribute *errorAttr = $3->find( "error" );
+ if ( errorAttr != 0 )
+ cgd->setForcedErrorState();
+ };
+
+EntryPointList:
+ EntryPointList TagEntry |
+ /* Nothing */;
+
+TagEntry:
+ '<' TAG_entry AttributeList '>'
+ '<' '/' TAG_entry '>' {
+ Attribute *nameAttr = $3->find( "name" );
+ if ( nameAttr == 0 )
+ xml_error(@2) << "tag <entry_points>::<entry> requires a name attribute" << endl;
+ else {
+ char *data = xmlData.data;
+ unsigned long entry = strtoul( data, &data, 10 );
+ cgd->addEntryPoint( nameAttr->value, entry );
+ }
+ };
+
+TagStateList:
+ TagStateListHead
+ StateList
+ '<' '/' TAG_state_list '>';
+
+TagStateListHead:
+ '<' TAG_state_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <state_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initStateList( length );
+ curState = 0;
+ }
+ };
+
+StateList:
+ StateList TagState |
+ /* Nothing */;
+
+TagState:
+ TagStateHead
+ StateItemList
+ '<' '/' TAG_state '>' {
+ curState += 1;
+ };
+
+TagStateHead:
+ '<' TAG_state AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "final" );
+ if ( lengthAttr != 0 )
+ cgd->setFinal( curState );
+ };
+
+StateItemList:
+ StateItemList StateItem |
+ /* Nothing */;
+
+StateItem:
+ TagStateActions |
+ TagStateCondList |
+ TagTransList;
+
+TagStateActions:
+ '<' TAG_state_actions '>'
+ '<' '/' TAG_state_actions '>' {
+ char *ad = xmlData.data;
+
+ long toStateAction = readOffsetPtr( ad, &ad );
+ long fromStateAction = readOffsetPtr( ad, &ad );
+ long eofAction = readOffsetPtr( ad, &ad );
+
+ cgd->setStateActions( curState, toStateAction,
+ fromStateAction, eofAction );
+ };
+
+TagStateCondList:
+ TagStateCondListHead
+ StateCondList
+ '<' '/' TAG_cond_list '>';
+
+TagStateCondListHead:
+ '<' TAG_cond_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <cond_list> requires a length attribute" << endl;
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initStateCondList( curState, length );
+ curStateCond = 0;
+ }
+ }
+
+StateCondList:
+ StateCondList StateCond |
+ /* Empty */;
+
+StateCond:
+ '<' TAG_c '>'
+ '<' '/' TAG_c '>' {
+ char *td = xmlData.data;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long condId = readOffsetPtr( td, &td );
+ cgd->addStateCond( curState, lowKey, highKey, condId );
+ }
+
+TagTransList:
+ TagTransListHead
+ TransList
+ '<' '/' TAG_trans_list '>' {
+ cgd->finishTransList( curState );
+ };
+
+TagTransListHead:
+ '<' TAG_trans_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <trans_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initTransList( curState, length );
+ curTrans = 0;
+ }
+ };
+
+TransList:
+ TransList TagTrans |
+ /* Nothing */;
+
+TagTrans:
+ '<' TAG_t AttributeList '>'
+ '<' '/' TAG_t '>' {
+ char *td = xmlData.data;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long targ = readOffsetPtr( td, &td );
+ long action = readOffsetPtr( td, &td );
+
+ cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action );
+ };
+
+TagActionList:
+ TagActionListHead
+ ActionList
+ '<' '/' TAG_action_list '>';
+
+TagActionListHead:
+ '<' TAG_action_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <action_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionList( length );
+ curAction = 0;
+ }
+ };
+
+ActionList:
+ ActionList TagAction |
+ /* Nothing */;
+
+TagAction:
+ '<' TAG_action AttributeList '>'
+ InlineList
+ '<' '/' TAG_action '>' {
+ Attribute *lineAttr = $3->find( "line" );
+ Attribute *colAttr = $3->find( "col" );
+ Attribute *nameAttr = $3->find( "name" );
+ if ( lineAttr == 0 || colAttr == 0)
+ xml_error(@2) << "tag <action> requires a line and col attributes" << endl;
+ else {
+ unsigned long line = strtoul( lineAttr->value, 0, 10 );
+ unsigned long col = strtoul( colAttr->value, 0, 10 );
+
+ char *name = 0;
+ if ( nameAttr != 0 )
+ name = nameAttr->value;
+
+ cgd->newAction( curAction++, name, line, col, $5 );
+ }
+ };
+
+InlineList:
+ InlineList InlineItem {
+ /* Append the item to the list, return the list. */
+ $1->append( $2 );
+ $$ = $1;
+ } |
+ /* Nothing */ {
+ /* Start with empty list. */
+ $$ = new InlineList;
+ };
+
+InlineItem:
+ TagText |
+ TagGoto |
+ TagCall |
+ TagNext |
+ TagGotoExpr |
+ TagCallExpr |
+ TagNextExpr |
+ TagRet |
+ TagBreak |
+ TagPChar |
+ TagChar |
+ TagHold |
+ TagExec |
+ TagHoldTE |
+ TagExecTE |
+ TagCurs |
+ TagTargs |
+ TagIlEntry |
+ TagLmSwitch |
+ TagLmSetActId |
+ TagLmSetTokEnd |
+ TagLmGetTokEnd |
+ TagSubAction |
+ TagLmInitTokStart |
+ TagLmInitAct |
+ TagLmSetTokStart;
+
+TagText:
+ '<' TAG_text AttributeList '>'
+ '<' '/' TAG_text '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Text );
+ $$->data = strdup(xmlData.data);
+ };
+
+TagGoto:
+ '<' TAG_goto '>'
+ '<' '/' TAG_goto '>' {
+ int targ = strtol( xmlData.data, 0, 10 );
+ $$ = new InlineItem( InputLoc(), InlineItem::Goto );
+ $$->targId = targ;
+ };
+
+TagCall:
+ '<' TAG_call '>'
+ '<' '/' TAG_call '>' {
+ int targ = strtol( xmlData.data, 0, 10 );
+ $$ = new InlineItem( InputLoc(), InlineItem::Call );
+ $$->targId = targ;
+ };
+
+TagNext:
+ '<' TAG_next '>'
+ '<' '/' TAG_next '>' {
+ int targ = strtol( xmlData.data, 0, 10 );
+ $$ = new InlineItem( InputLoc(), InlineItem::Next );
+ $$->targId = targ;
+ };
+
+TagGotoExpr:
+ '<' TAG_goto_expr '>'
+ InlineList
+ '<' '/' TAG_goto_expr '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::GotoExpr );
+ $$->children = $4;
+ };
+
+TagCallExpr:
+ '<' TAG_call_expr '>'
+ InlineList
+ '<' '/' TAG_call_expr '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::CallExpr );
+ $$->children = $4;
+ };
+
+TagNextExpr:
+ '<' TAG_next_expr '>'
+ InlineList
+ '<' '/' TAG_next_expr '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::NextExpr );
+ $$->children = $4;
+ };
+
+TagRet:
+ '<' TAG_ret '>'
+ '<' '/' TAG_ret '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Ret );
+ };
+
+TagPChar:
+ '<' TAG_pchar '>'
+ '<' '/' TAG_pchar '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::PChar );
+ };
+
+TagChar:
+ '<' TAG_char '>'
+ '<' '/' TAG_char '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Char );
+ };
+
+TagHold:
+ '<' TAG_hold '>'
+ '<' '/' TAG_hold '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Hold );
+ };
+
+TagExec:
+ '<' TAG_exec '>'
+ InlineList
+ '<' '/' TAG_exec '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Exec );
+ $$->children = $4;
+ };
+
+TagHoldTE:
+ '<' TAG_holdte '>'
+ '<' '/' TAG_holdte '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::HoldTE );
+ };
+
+TagExecTE:
+ '<' TAG_execte '>'
+ InlineList
+ '<' '/' TAG_execte '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::ExecTE );
+ $$->children = $4;
+ };
+
+TagCurs:
+ '<' TAG_curs '>'
+ '<' '/' TAG_curs '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Curs );
+ };
+
+TagTargs:
+ '<' TAG_targs '>'
+ '<' '/' TAG_targs '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Targs );
+ };
+
+TagIlEntry:
+ '<' TAG_entry '>'
+ '<' '/' TAG_entry '>' {
+ int targ = strtol( xmlData.data, 0, 10 );
+ $$ = new InlineItem( InputLoc(), InlineItem::Entry );
+ $$->targId = targ;
+ };
+
+TagBreak:
+ '<' TAG_break '>'
+ '<' '/' TAG_break '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::Break );
+ };
+
+
+TagLmSwitch:
+ '<' TAG_lm_switch AttributeList '>'
+ LmActionList
+ '<' '/' TAG_lm_switch '>' {
+ bool handlesError = false;
+ Attribute *handlesErrorAttr = $3->find( "handles_error" );
+ if ( handlesErrorAttr != 0 )
+ handlesError = true;
+
+ $$ = new InlineItem( InputLoc(), InlineItem::LmSwitch );
+ $$->children = $5;
+ $$->handlesError = handlesError;
+ };
+
+LmActionList:
+ LmActionList TagInlineAction {
+ $$ = $1;
+ $$->append( $2 );
+ } |
+ /* Nothing */ {
+ $$ = new InlineList;
+ };
+
+TagInlineAction:
+ '<' TAG_sub_action AttributeList '>'
+ InlineList
+ '<' '/' TAG_sub_action '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::SubAction );
+ $$->children = $5;
+
+ Attribute *idAttr = $3->find( "id" );
+ if ( idAttr != 0 ) {
+ unsigned long id = strtoul( idAttr->value, 0, 10 );
+ $$->lmId = id;
+ }
+ };
+
+TagLmSetActId:
+ '<' TAG_set_act '>'
+ '<' '/' TAG_set_act '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmSetActId );
+ $$->lmId = strtol( xmlData.data, 0, 10 );
+ };
+
+TagLmGetTokEnd:
+ '<' TAG_get_tokend '>'
+ '<' '/' TAG_get_tokend '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd );
+ };
+
+TagLmSetTokEnd:
+ '<' TAG_set_tokend '>'
+ '<' '/' TAG_set_tokend '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd );
+ $$->offset = strtol( xmlData.data, 0, 10 );
+ };
+
+TagSubAction:
+ '<' TAG_sub_action '>'
+ InlineList
+ '<' '/' TAG_sub_action '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::SubAction );
+ $$->children = $4;
+ };
+
+TagLmInitTokStart:
+ '<' TAG_init_tokstart '>'
+ '<' '/' TAG_init_tokstart '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmInitTokStart );
+ };
+
+TagLmInitAct:
+ '<' TAG_init_act '>'
+ '<' '/' TAG_init_act '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmInitAct );
+ };
+
+TagLmSetTokStart:
+ '<' TAG_set_tokstart '>'
+ '<' '/' TAG_set_tokstart '>' {
+ $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokStart );
+ cgd->hasLongestMatch = true;
+ };
+
+TagActionTableList:
+ TagActionTableListHead
+ ActionTableList
+ '<' '/' TAG_action_table_list '>';
+
+TagActionTableListHead:
+ '<' TAG_action_table_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <action_table_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionTableList( length );
+ curActionTable = 0;
+ }
+ };
+
+ActionTableList:
+ ActionTableList TagActionTable |
+ /* Nothing */;
+
+TagActionTable:
+ '<' TAG_action_table AttributeList '>'
+ '<' '/' TAG_action_table '>' {
+ /* Find the length of the action table. */
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <at> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+
+ /* Collect the action table. */
+ RedAction *redAct = cgd->allActionTables + curActionTable;
+ redAct->actListId = curActionTable;
+ redAct->key.setAsNew( length );
+ char *ptr = xmlData.data;
+ int pos = 0;
+ while ( *ptr != 0 ) {
+ unsigned long actionId = strtoul( ptr, &ptr, 10 );
+ redAct->key[pos].key = 0;
+ redAct->key[pos].value = cgd->allActions+actionId;
+ pos += 1;
+ }
+
+ /* Insert into the action table map. */
+ cgd->redFsm->actionMap.insert( redAct );
+ }
+
+ curActionTable += 1;
+ };
+
+TagCondSpaceList:
+ TagCondSpaceListHead
+ CondSpaceList
+ '<' '/' TAG_cond_space_list '>';
+
+TagCondSpaceListHead:
+ '<' TAG_cond_space_list AttributeList '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <cond_space_list> requires a length attribute" << endl;
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initCondSpaceList( length );
+ curCondSpace = 0;
+ }
+ };
+
+CondSpaceList:
+ CondSpaceList TagCondSpace |
+ TagCondSpace;
+
+TagCondSpace:
+ '<' TAG_cond_space AttributeList '>'
+ '<' '/' TAG_cond_space '>' {
+ Attribute *lengthAttr = $3->find( "length" );
+ Attribute *idAttr = $3->find( "id" );
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <cond_space> requires a length attribute" << endl;
+ else {
+ if ( lengthAttr == 0 )
+ xml_error(@2) << "tag <cond_space> requires an id attribute" << endl;
+ else {
+ unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 );
+ ulong length = readLength( lengthAttr->value );
+
+ char *td = xmlData.data;
+ Key baseKey = readKey( td, &td );
+
+ cgd->newCondSpace( curCondSpace, condSpaceId, baseKey );
+ for ( ulong a = 0; a < length; a++ ) {
+ long actionOffset = readOffsetPtr( td, &td );
+ cgd->condSpaceItem( curCondSpace, actionOffset );
+ }
+ curCondSpace += 1;
+ }
+ }
+ };
+
+%%
+
+unsigned long readLength( char *td )
+{
+ return strtoul( td, 0, 10 );
+}
+
+Key readKey( char *td, char **end )
+{
+ if ( keyOps->isSigned )
+ return Key( strtol( td, end, 10 ) );
+ else
+ return Key( strtoul( td, end, 10 ) );
+}
+
+long readOffsetPtr( char *td, char **end )
+{
+ while ( *td == ' ' || *td == '\t' )
+ td++;
+
+ if ( *td == 'x' ) {
+ if ( end != 0 )
+ *end = td + 1;
+ return -1;
+ }
+
+ return strtol( td, end, 10 );
+}
+
+void yyerror( char *err )
+{
+ /* Bison won't give us the location, but in the last call to the scanner we
+ * saved a pointer to the locationn variable. Use that. instead. */
+ error(::yylloc->first_line, ::yylloc->first_column) << err << endl;
+}
+
diff --git a/rlcodegen/xmlscan.lex b/rlcodegen/xmlscan.lex
new file mode 100644
index 0000000..4ebd70a
--- /dev/null
+++ b/rlcodegen/xmlscan.lex
@@ -0,0 +1,433 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+%{
+
+#define YY_NEVER_INTERACTIVE 1
+//#define WANT_TOKEN_WRITE
+
+#include <iostream>
+#include "vector.h"
+#include "rlcodegen.h"
+#include "xmlparse.h"
+#include "buffer.h"
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+Buffer tokbuf;
+int builtinBrace = 0;
+bool inlineWhitespace = true;
+bool handlingInclude = false;
+
+YYSTYPE *yylval;
+YYLTYPE *yylloc;
+
+void garble();
+
+void extendToken();
+void extendToken( char *data, int len );
+
+int emitToken( int token, char *data, int len );
+int emitNoData( int token );
+int emitTag( char *data, int len, bool isOpen );
+void passThrough( char *data );
+void popInclude();
+void scannerInit();
+
+enum InlineBlockType {
+ CurlyDelimited,
+ SemiTerminated
+} inlineBlockType;
+
+/* Using a wrapper for the parser, must the lex declaration. */
+#define YY_DECL int rlcodegen_lex()
+
+class Perfect_Hash
+{
+private:
+ static inline unsigned int hash (const char *str, unsigned int len);
+
+public:
+ static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len);
+};
+
+Vector<bool> shouldEmitXMLData;
+
+int first_line = 1;
+int first_column = 1;
+int last_line = 1;
+int last_column = 0;
+
+Buffer xmlData;
+
+%}
+
+%x OPEN_TAG
+%x CLOSE_TAG1
+%x CLOSE_TAG2
+%x ATTR_LIST
+%x ATTR_LITERAL
+
+WSCHAR [\t\n\v\f\r ]
+IDENT [a-zA-Z_][a-zA-Z_0-9\-]*
+
+%%
+
+ /* Numbers in outter code. */
+<INITIAL>"<" {
+ BEGIN(OPEN_TAG);
+ shouldEmitXMLData.prepend( false );
+ return emitNoData( *yytext );
+}
+
+<INITIAL>[^<&]+ {
+ if ( shouldEmitXMLData[0] )
+ xmlData.append( yytext, yyleng );
+ garble();
+}
+<INITIAL>"&amp;" {
+ if ( shouldEmitXMLData[0] )
+ xmlData.append( "&", 1 );
+ garble();
+}
+<INITIAL>"&lt;" {
+ if ( shouldEmitXMLData[0] )
+ xmlData.append( "<", 1 );
+ garble();
+}
+<INITIAL>"&gt;" {
+ if ( shouldEmitXMLData[0] )
+ xmlData.append( ">", 1 );
+ garble();
+}
+
+ /*
+ * Tags
+ */
+
+<OPEN_TAG>"/" {
+ BEGIN(CLOSE_TAG1);
+ xmlData.append(0);
+ return emitNoData( *yytext );
+}
+
+<OPEN_TAG>{IDENT} {
+ BEGIN( ATTR_LIST );
+ return emitTag( yytext, yyleng, true );
+}
+
+<OPEN_TAG,CLOSE_TAG1>{WSCHAR}+ {
+ garble();
+}
+
+<CLOSE_TAG1>{IDENT} {
+ BEGIN( CLOSE_TAG2 );
+ return emitTag( yytext, yyleng, false );
+}
+
+<CLOSE_TAG2>">" {
+ shouldEmitXMLData.remove( 0 );
+ BEGIN(INITIAL);
+ return emitNoData( *yytext );
+}
+
+<ATTR_LIST>{IDENT} {
+ return emitToken( XML_Word, yytext, yyleng );
+}
+
+<ATTR_LIST>\" {
+ BEGIN(ATTR_LITERAL);
+ extendToken();
+}
+<ATTR_LITERAL>\\. extendToken( yytext+1, 1 );
+<ATTR_LITERAL>\\\n extendToken( yytext+1, 1 );
+<ATTR_LITERAL>[^\\"]+ extendToken( yytext, yyleng );
+
+ /* Terminate a double literal */
+<ATTR_LITERAL>\" {
+ BEGIN(ATTR_LIST);
+ return emitToken( XML_Literal, 0, 0 );
+}
+
+<ATTR_LIST>{WSCHAR}+ {
+ garble();
+}
+
+<ATTR_LIST>">" {
+ BEGIN(INITIAL);
+ return emitNoData( *yytext );
+}
+
+<ATTR_LIST>. {
+ return emitNoData( *yytext );
+}
+
+%%
+
+/* Write out token data, escaping special charachters. */
+#ifdef WANT_TOKEN_WRITE
+void writeToken( int token, char *data )
+{
+ cout << "token id " << token << " at " << id->fileName << ":" <<
+ yylloc->first_line << ":" << yylloc->first_column << "-" <<
+ yylloc->last_line << ":" << yylloc->last_column << " ";
+
+ if ( data != 0 ) {
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '\n': cout << "\\n"; break;
+ case '\t': cout << "\\t"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+ }
+ cout << endl;
+}
+#endif
+
+/* Caclulate line info from yytext. Called on every pattern match. */
+void updateLineInfo()
+{
+ /* yytext should always have at least wone char. */
+ assert( yytext[0] != 0 );
+
+ /* Scan through yytext up to the last character. */
+ char *p = yytext;
+ for ( ; p[1] != 0; p++ ) {
+ if ( p[0] == '\n' ) {
+ last_line += 1;
+ last_column = 0;
+ }
+ else {
+ last_column += 1;
+ }
+ }
+
+ /* Always consider the last character as not a newline. Newlines at the
+ * end of a token are as any old character at the end of the line. */
+ last_column += 1;
+
+ /* The caller may be about to emit a token, be prepared to pass the line
+ * info to the parser. */
+ yylloc->first_line = first_line;
+ yylloc->first_column = first_column;
+ yylloc->last_line = last_line;
+ yylloc->last_column = last_column;
+
+ /* If the last character was indeed a newline, then wrap ahead now. */
+ if ( p[0] == '\n' ) {
+ last_line += 1;
+ last_column = 0;
+ }
+}
+
+
+/* Eat up a matched pattern that will not be part of a token. */
+void garble()
+{
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* The next token starts ahead of the last token. */
+ first_line = last_line;
+ first_column = last_column + 1;
+}
+
+/* Extend a token, but don't add any data to it, more token data expected. */
+void extendToken()
+{
+ /* Update line information from yytext. */
+ updateLineInfo();
+}
+
+/* Append data to the end of the token. More token data expected. */
+void extendToken( char *data, int len )
+{
+ if ( data != 0 && len > 0 )
+ tokbuf.append( data, len );
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+}
+
+
+/* Append data to the end of a token and emitToken it to the parser. */
+int emitToken( int token, char *data, int len )
+{
+ /* Append the data and null terminate. */
+ if ( data != 0 && len > 0 )
+ tokbuf.append( data, len );
+ tokbuf.append( 0 );
+
+ /* Duplicate the buffer. */
+ yylval->data = new char[tokbuf.length];
+ strcpy( yylval->data, tokbuf.data );
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* Write token info. */
+#ifdef WANT_TOKEN_WRITE
+ writeToken( token, tokbuf.data );
+#endif
+
+ /* Clear out the buffer. */
+ tokbuf.clear();
+
+ /* The next token starts ahead of the last token. */
+ first_line = last_line;
+ first_column = last_column + 1;
+
+ return token;
+}
+
+/* Append data to the end of a token and emitToken it to the parser. */
+int emitTag( char *data, int len, bool isOpen )
+{
+ /* Lookup the tag. */
+ int token = TAG_unknown;
+
+ XMLTagHashPair *tag = Perfect_Hash::in_word_set( data, len );
+ if ( tag != 0 )
+ token = tag->id;
+
+ if ( isOpen ) {
+ switch ( token ) {
+ case TAG_host: case TAG_t: case TAG_start_state:
+ case TAG_action_table:
+ case TAG_alphtype: case TAG_state_actions:
+ case TAG_entry_points:
+ case TAG_text: case TAG_goto:
+ case TAG_call: case TAG_next:
+ case TAG_set_act: case TAG_set_tokend:
+ case TAG_entry: case TAG_option:
+ case TAG_cond_space: case TAG_c:
+ shouldEmitXMLData[0] = true;
+ xmlData.clear();
+ }
+ }
+
+ return emitToken( token, data, len );
+}
+
+/* Emit a token with no data to the parser. */
+int emitNoData( int token )
+{
+ /* Return null to the parser. */
+ yylval->data = 0;
+
+ /* Update line information from yytext. */
+ updateLineInfo();
+
+ /* Write token info. */
+#ifdef WANT_TOKEN_WRITE
+ writeToken( token, 0 );
+#endif
+
+ /* Clear out the buffer. */
+ tokbuf.clear();
+
+ /* The next token starts ahead of the last token. */
+ first_line = last_line;
+ first_column = last_column + 1;
+
+ return token;
+}
+
+/* Pass tokens in outter code through to the output. */
+void passThrough( char *data )
+{
+ /* If no errors, we are emitting code and we are at the bottom of the
+ * include stack (the source file listed on the command line) then write
+ * out the data. */
+ if ( gblErrorCount == 0 && outputFormat == OutCode )
+ *outStream << data;
+}
+
+/* Init a buffer. */
+Buffer::Buffer()
+:
+ data(0),
+ length(0),
+ allocated(0)
+{
+}
+
+/* Empty out a buffer on destruction. */
+Buffer::~Buffer()
+{
+ empty();
+}
+
+/* Free the space allocated for the buffer. */
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+/* Grow the buffer when to len allocation. */
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+int yywrap()
+{
+ /* Once processessing of the input is done, signal no more. */
+ return 1;
+}
+
+/* Here simply to suppress the unused yyunpt warning. */
+void thisFuncIsNeverCalled()
+{
+ yyunput(0, 0);
+}
+
+void scannerInit()
+{
+ /* Set this up in case we are initially given something other
+ * than an opening tag. */
+ shouldEmitXMLData.prepend( false );
+}
+
+/* Wrapper for the lexer which stores the locations of the value and location
+ * variables of the parser into globals. The parser is reentrant, however the scanner
+ * does not need to be, so globals work fine. This saves us passing them around
+ * all the helper functions. */
+int yylex( YYSTYPE *yylval, YYLTYPE *yylloc )
+{
+ ::yylval = yylval;
+ ::yylloc = yylloc;
+ return rlcodegen_lex();
+}
diff --git a/rlcodegen/xmlscan.rl b/rlcodegen/xmlscan.rl
new file mode 100644
index 0000000..926c211
--- /dev/null
+++ b/rlcodegen/xmlscan.rl
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <iostream>
+#include <string.h>
+#include "vector.h"
+#include "xmlparse.h"
+#include "rlcodegen.h"
+#include "buffer.h"
+
+using std::istream;
+using std::cout;
+using std::endl;
+
+#define BUFSIZE 4096
+
+%%{
+ machine Scanner;
+ write data;
+}%%
+
+class Perfect_Hash
+{
+private:
+ static inline unsigned int hash (const char *str, unsigned int len);
+
+public:
+ static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len);
+};
+
+struct Scanner
+{
+ Scanner( istream &input ) :
+ input(input),
+ curline(1),
+ curcol(1),
+ p(0), pe(0),
+ done(false),
+ data(0), data_len(0),
+ value(0)
+ {
+ %%{
+ machine Scanner;
+ write init;
+ }%%
+ }
+
+ int scan();
+ void adjustAttrPointers( int distance );
+
+ istream &input;
+
+ /* Scanner State. */
+ int cs, act, have, curline, curcol;
+ char *tokstart, *tokend;
+ char *p, *pe;
+ int done;
+
+ /* Token data */
+ char *data;
+ int data_len;
+ int value;
+ AttrMkList attrMkList;
+ Buffer buffer;
+ char *tag_id_start;
+ int tag_id_len;
+ int token_col, token_line;
+
+ char buf[BUFSIZE];
+};
+
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 1
+#define TK_EOF 2
+#define TK_OpenTag 3
+#define TK_CloseTag 4
+
+#define ret_tok( _tok ) token = (_tok); data = tokstart
+
+void Scanner::adjustAttrPointers( int distance )
+{
+ for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) {
+ attr->id -= distance;
+ attr->value -= distance;
+ }
+}
+
+int Scanner::scan( )
+{
+ int token = TK_NO_TOKEN;
+ int space, readlen;
+ char *attr_id_start;
+ char *attr_value_start;
+ int attr_id_len;
+ int attr_value_len;
+
+ attrMkList.empty();
+ buffer.clear();
+
+ while ( 1 ) {
+ if ( p == pe ) {
+ //printf("scanner: need more data\n");
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ //printf("scanner: buffer broken mid token\n");
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+
+ int distance = tokstart - buf;
+ tokend -= distance;
+ tag_id_start -= distance;
+ attr_id_start -= distance;
+ attr_value_start -= distance;
+ adjustAttrPointers( distance );
+ tokstart = buf;
+ }
+
+ p = buf + have;
+ space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ //printf("scanner: out of buffer space, you have a really long tag\n");
+ return TK_ERR;
+ }
+
+ if ( done ) {
+ //printf("scanner: end of file\n");
+ p[0] = 0;
+ readlen = 1;
+ }
+ else {
+ input.read( p, space );
+ readlen = input.gcount();
+ if ( input.eof() ) {
+ //printf("scanner: setting done flag\n");
+ done = 1;
+ }
+ }
+
+ pe = p + readlen;
+ }
+
+ /* There is no claim that this is a proper XML parser, but it is good
+ * enough for our purposes. */
+ %%{
+ machine Scanner;
+
+ action colup { curcol++; }
+ action start_tok { token_col = curcol; token_line = curline; }
+ NL = '\n' @{ curcol = 0; curline++; };
+
+ WS = [\r\t ] | NL;
+ id = [_a-zA-Z][_a-zA-Z0-9]*;
+ literal = '"' ( [^"] | NL )* '"';
+
+ # Attribute identifiers.
+ action start_attr_id { attr_id_start = p; }
+ action leave_attr_id { attr_id_len = p - attr_id_start; }
+
+ attr_id = id >start_attr_id %leave_attr_id;
+
+ # Attribute values
+ action start_attr_value { attr_value_start = p; }
+ action leave_attr_value
+ {
+ attr_value_len = p - attr_value_start;
+
+ AttrMarker newAttr;
+ newAttr.id = attr_id_start;
+ newAttr.idLen = attr_id_len;
+ newAttr.value = attr_value_start;
+ newAttr.valueLen = attr_value_len;
+ attrMkList.append( newAttr );
+ }
+
+ attr_value = literal >start_attr_value %leave_attr_value;
+
+ # Attribute list.
+ attribute = attr_id WS* '=' WS* attr_value WS*;
+
+ # Tag identifiers.
+ action tag_id_start { tag_id_start = p; }
+ action leave_tag_id { tag_id_len = p - tag_id_start; }
+
+ tag_id = id >tag_id_start %leave_tag_id;
+
+ main := |*
+ # Tags
+ ( '<' WS* tag_id ( WS+ attribute* )? '>' ) >start_tok $colup
+ => { ret_tok( TK_OpenTag ); fbreak; };
+
+ ( '<' WS* '/' WS* tag_id WS* '>' ) >start_tok $colup
+ => { ret_tok( TK_CloseTag ); fbreak; };
+
+ # Data in between tags.
+ ( [^<&\0] | NL ) $colup
+ => { buffer.append( *p ); };
+
+ # Specials.
+ "&amp;" $colup
+ => { buffer.append( '&' ); };
+ "&lt;" $colup
+ => { buffer.append( '<' ); };
+ "&gt;" $colup
+ => { buffer.append( '>' ); };
+
+ # EOF
+ 0 >start_tok => { ret_tok( TK_EOF ); fbreak; };
+
+ *|;
+
+ write exec;
+ }%%
+
+ if ( cs == Scanner_error )
+ return TK_ERR;
+
+ if ( token != TK_NO_TOKEN ) {
+ /* fbreak does not advance p, so we do it manually. */
+ p = p + 1;
+ data_len = p - data;
+ return token;
+ }
+ }
+}
+
+
+int xml_parse( istream &input, char *fileName )
+{
+ Scanner scanner( input );
+ Parser parser( fileName );
+
+ parser.init();
+
+ while ( 1 ) {
+ int token = scanner.scan();
+ if ( token == TK_EOF ) {
+ //cout << "parser_driver: EOF" << endl;
+ parser.token( _eof );
+ break;
+ }
+ else if ( token == TK_ERR ) {
+ //cout << "parser_driver: ERR" << endl;
+ break;
+ }
+ else {
+ /* All other tokens are either open or close tags. */
+ XMLTagHashPair *tagId = Perfect_Hash::in_word_set(
+ scanner.tag_id_start, scanner.tag_id_len );
+
+ XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ?
+ XMLTag::Open : XMLTag::Close );
+
+ if ( tagId != 0 ) {
+ /* Get attributes for open tags. */
+ if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) {
+ tag->attrList = new AttrList;
+ for ( AttrMkList::Iter attr = scanner.attrMkList;
+ attr.lte(); attr++ )
+ {
+ Attribute newAttr;
+ newAttr.id = new char[attr->idLen+1];
+ memcpy( newAttr.id, attr->id, attr->idLen );
+ newAttr.id[attr->idLen] = 0;
+
+ /* Exclude the surrounding quotes. */
+ newAttr.value = new char[attr->valueLen-1];
+ memcpy( newAttr.value, attr->value+1, attr->valueLen-2 );
+ newAttr.value[attr->valueLen-2] = 0;
+
+ tag->attrList->append( newAttr );
+ }
+ }
+
+ /* Get content for closing tags. */
+ if ( token == TK_CloseTag ) {
+ switch ( tagId->id ) {
+ case TAG_host: case TAG_option:
+ case TAG_t: case TAG_alphtype:
+ case TAG_text: case TAG_goto:
+ case TAG_call: case TAG_next:
+ case TAG_entry: case TAG_set_tokend:
+ case TAG_set_act: case TAG_start_state:
+ case TAG_state_actions: case TAG_action_table:
+ case TAG_cond_space: case TAG_c:
+ tag->content = new char[scanner.buffer.length+1];
+ memcpy( tag->content, scanner.buffer.data,
+ scanner.buffer.length );
+ tag->content[scanner.buffer.length] = 0;
+ break;
+ }
+ }
+ }
+
+ #if 0
+ cout << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") <<
+ ": " << tag->tagId->name << endl;
+ if ( tag->attrList != 0 ) {
+ for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ )
+ cout << " " << attr->id << ": " << attr->value << endl;
+ }
+ if ( tag->content != 0 )
+ cout << " content: " << tag->content << endl;
+ #endif
+
+ parser.token( tag, scanner.token_col, scanner.token_line );
+ }
+ }
+
+ return 0;
+}
diff --git a/rlcodegen/xmltags.gperf b/rlcodegen/xmltags.gperf
new file mode 100644
index 0000000..2554e24
--- /dev/null
+++ b/rlcodegen/xmltags.gperf
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2005 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+%{
+#include <string.h>
+#include "xmlparse.h"
+%}
+%compare-strncmp
+struct XMLTagHashPair;
+%%
+ragel, TAG_ragel
+ragel_def, TAG_ragel_def
+host, TAG_host
+state_list, TAG_state_list
+state, TAG_state
+trans_list, TAG_trans_list
+t, TAG_t
+machine, TAG_machine
+start_state, TAG_start_state
+action_list, TAG_action_list
+action, TAG_action
+action_table_list, TAG_action_table_list
+action_table, TAG_action_table
+alphtype, TAG_alphtype
+getkey, TAG_getkey
+state_actions, TAG_state_actions
+entry_points, TAG_entry_points
+text, TAG_text
+goto, TAG_goto
+call, TAG_call
+next, TAG_next
+goto_expr, TAG_goto_expr
+call_expr, TAG_call_expr
+next_expr, TAG_next_expr
+ret, TAG_ret
+pchar, TAG_pchar
+char, TAG_char
+hold, TAG_hold
+exec, TAG_exec
+holdte, TAG_holdte
+execte, TAG_execte
+curs, TAG_curs
+targs, TAG_targs
+entry, TAG_entry
+data, TAG_data
+lm_switch, TAG_lm_switch
+sub_action, TAG_sub_action
+init_act, TAG_init_act
+set_act, TAG_set_act
+get_tokend, TAG_get_tokend
+set_tokend, TAG_set_tokend
+init_tokstart, TAG_init_tokstart
+set_tokstart, TAG_set_tokstart
+write, TAG_write
+curstate, TAG_curstate
+access, TAG_access
+break, TAG_break
+option, TAG_option
+cond_space_list, TAG_cond_space_list
+cond_space, TAG_cond_space
+cond_list, TAG_cond_list
+c, TAG_c
diff --git a/test/Makefile.in b/test/Makefile.in
new file mode 100644
index 0000000..4dafdc0
--- /dev/null
+++ b/test/Makefile.in
@@ -0,0 +1,30 @@
+#
+# Copyright 2002-2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+test:
+ @./runtests
+
+clean:
+ rm -f *.c *.cpp *.m *.d *.java *.bin *.class *.exp \
+ *.out *_c.rl *_d.rl *_java.rl
+
+distclean: clean
+ rm -f Makefile
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..beb0c03
--- /dev/null
+++ b/test/README
@@ -0,0 +1,13 @@
+
+The test suite now depends on TXL. Since the trend in Ragel is towards
+independence of the host-language, tests are now being written in a fictional
+mini-language designed for the purpose of testing ragel. The host language
+test-cases are then generated using a TXL transformation. This allows one test
+case to be run against all host languages in addition to all code generation
+styles.
+
+TXL is not open source, but a free download is available from the homepage.
+
+http://www.txl.ca/
+
+-Adrian
diff --git a/test/atoi1.rl b/test/atoi1.rl
new file mode 100644
index 0000000..60fc959
--- /dev/null
+++ b/test/atoi1.rl
@@ -0,0 +1,69 @@
+/*
+ * @LANG: indep
+ */
+bool neg;
+int val;
+%%
+val = 0;
+neg = false;
+%%{
+ machine AtoI;
+
+ action begin {
+ neg = false;
+ val = 0;
+ }
+
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ action finish {
+ if ( neg ) {
+ val = -1 * val;
+ }
+ }
+ action print {
+ printi val;
+ prints "\n";
+ }
+
+ atoi = (
+ ('-'@see_neg | '+')? (digit @add_digit)+
+ ) >begin %finish;
+
+ main := atoi '\n' @print;
+}%%
+/* _____INPUT_____
+"1\n"
+"12\n"
+"222222\n"
+"+2123\n"
+"213 3213\n"
+"-12321\n"
+"--123\n"
+"-99\n"
+" -3000\n"
+_____INPUT_____ */
+
+/* _____OUTPUT_____
+1
+ACCEPT
+12
+ACCEPT
+222222
+ACCEPT
+2123
+ACCEPT
+FAIL
+-12321
+ACCEPT
+FAIL
+-99
+ACCEPT
+FAIL
+_____OUTPUT_____ */
diff --git a/test/atoi2.rl b/test/atoi2.rl
new file mode 100644
index 0000000..e16380a
--- /dev/null
+++ b/test/atoi2.rl
@@ -0,0 +1,81 @@
+/*
+ * @LANG: indep
+ * This implementes an atoi machine using the statechart paradigm.
+ */
+bool neg;
+int val;
+%%
+val = 0;
+neg = false;
+%%{
+ machine StateChart;
+
+ action begin {
+ neg = false;
+ val = 0;
+ }
+
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ action finish {
+ if ( neg )
+ val = -1 * val;
+ }
+
+ atoi = (
+ start: (
+ '-' @see_neg ->om_num |
+ '+' ->om_num |
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # One or more nums.
+ om_num: (
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # Zero ore more nums.
+ more_nums: (
+ [0-9] @add_digit ->more_nums |
+ '' -> final
+ )
+ ) >begin %finish;
+
+ action oneof { printi val; prints "\n"; }
+ main := ( atoi '\n' @oneof )*;
+}%%
+/* _____INPUT_____
+"1\n"
+"12\n"
+"222222\n"
+"+2123\n"
+"213 3213\n"
+"-12321\n"
+"--123\n"
+"-99\n"
+" -3000\n"
+_____INPUT_____ */
+
+/* _____OUTPUT_____
+1
+ACCEPT
+12
+ACCEPT
+222222
+ACCEPT
+2123
+ACCEPT
+FAIL
+-12321
+ACCEPT
+FAIL
+-99
+ACCEPT
+FAIL
+_____OUTPUT_____ */
diff --git a/test/awkemu.rl b/test/awkemu.rl
new file mode 100644
index 0000000..a988651
--- /dev/null
+++ b/test/awkemu.rl
@@ -0,0 +1,157 @@
+/*
+ * @LANG: c
+ */
+
+/*
+ * Emulate the basic parser of the awk program. Breaks lines up into
+ * words and prints the words.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define LINEBUF 2048
+static char lineBuf[LINEBUF];
+static char blineBuf[LINEBUF];
+static int lineLen;
+static int blineLen;
+static int words;
+
+void finishLine();
+
+struct awkemu
+{
+ int cs;
+};
+
+%%{
+ machine awkemu;
+
+ variable curstate fsm->cs;
+
+ # Starts a line. Will initialize all the data necessary for capturing the line.
+ action startline {
+ lineLen = 0;
+ blineLen = 0;
+ words = 0;
+ }
+
+ # Will be executed on every character seen in a word. Captures the word
+ # to the broken up line buffer.
+ action wordchar {
+ blineBuf[blineLen++] = fc;
+ }
+
+ # Terminate a word. Adds the null after the word and increments the word count
+ # for the line.
+ action termword {
+ blineBuf[blineLen++] = 0;
+ words += 1;
+ }
+
+ # Will be executed on every character seen in a line (not including
+ # the newline itself.
+ action linechar {
+ lineBuf[lineLen++] = fc;
+ }
+
+ # This section of the machine deals with breaking up lines into fields.
+ # Lines are separed by the whitespace and put in an array of words.
+
+ # Words in a line.
+ word = (extend - [ \t\n])+;
+
+ # The whitespace separating words in a line.
+ whitespace = [ \t];
+
+ # The components in a line to break up. Either a word or a single char of
+ # whitespace. On the word capture characters.
+ blineElements = word $wordchar %termword | whitespace;
+
+ # Star the break line elements. Just be careful to decrement the leaving
+ # priority as we don't want multiple character identifiers to be treated as
+ # multiple single char identifiers.
+ breakLine = ( blineElements $1 %0 )* . '\n';
+
+ # This machine lets us capture entire lines. We do it separate from the words
+ # in a line.
+ bufLine = (extend - '\n')* $linechar %{ finishLine(); } . '\n';
+
+ # A line can then consist of the machine that will break up the line into
+ # words and a machine that will buffer the entire line.
+ line = ( breakLine | bufLine ) > startline;
+
+ # Any number of lines.
+ main := line*;
+}%%
+
+void finishLine()
+{
+ int i;
+ char *pword = blineBuf;
+ lineBuf[lineLen] = 0;
+ printf("endline(%i): %s\n", words, lineBuf );
+ for ( i = 0; i < words; i++ ) {
+ printf(" word: %s\n", pword );
+ pword += strlen(pword) + 1;
+ }
+}
+
+%% write data;
+
+void awkemu_init( struct awkemu *fsm )
+{
+ %% write init;
+}
+
+void awkemu_execute( struct awkemu *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+ %% write exec;
+}
+
+int awkemu_finish( struct awkemu *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == awkemu_error )
+ return -1;
+ if ( fsm->cs >= awkemu_first_final )
+ return 1;
+ return 0;
+}
+
+#include <stdio.h>
+#define BUFSIZE 2048
+
+struct awkemu fsm;
+char buf[BUFSIZE];
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ awkemu_init( &fsm );
+ awkemu_execute( &fsm, buf, len );
+ if ( awkemu_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test( "" );
+ test( "one line with no newline" );
+ test( "one line\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+FAIL
+endline(2): one line
+ word: one
+ word: line
+ACCEPT
+#endif
diff --git a/test/builtin.rl b/test/builtin.rl
new file mode 100644
index 0000000..626927b
--- /dev/null
+++ b/test/builtin.rl
@@ -0,0 +1,1209 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+
+void alph(const char *type)
+{
+ printf("%s\n", type);
+}
+
+struct builtin
+{
+ int cs;
+};
+
+%%{
+ machine builtin;
+ alphtype unsigned int;
+ variable curstate fsm->cs;
+
+ main := (
+ any @{alph("any");} |
+ ascii @{alph("ascii");} |
+ extend @{alph("extend");} |
+ alpha @{alph("alpha");} |
+ digit @{alph("digit");} |
+ alnum @{alph("alnum");} |
+ lower @{alph("lower");} |
+ upper @{alph("upper");} |
+ cntrl @{alph("cntrl");} |
+ graph @{alph("graph");} |
+ print @{alph("print");} |
+ punct @{alph("punct");} |
+ space @{alph("space");} |
+ xdigit @{alph("xdigit");}
+ )*;
+}%%
+
+%% write data;
+
+void builtin_init( struct builtin *fsm )
+{
+ %% write init;
+}
+
+void builtin_execute( struct builtin *fsm, const unsigned int *data, int len )
+{
+ const unsigned int *p = data;
+ const unsigned int *pe = data+len;
+ %% write exec;
+}
+
+int builtin_finish( struct builtin *fsm )
+{
+ if ( fsm->cs == builtin_error )
+ return -1;
+ else if ( fsm->cs >= builtin_first_final )
+ return 1;
+ return 0;
+}
+
+#include <stdio.h>
+#define BUFSIZE 2048
+
+struct builtin fsm;
+char buf[BUFSIZE];
+unsigned int i;
+
+int test( const unsigned int *data, int len )
+{
+ builtin_init( &fsm );
+ builtin_execute( &fsm, data, len );
+ if ( builtin_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+ return 0;
+}
+
+#define DLEN 258
+unsigned int data[DLEN] = {
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
+ 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+ 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141,
+ 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
+ 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
+ 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+ 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
+ 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216,
+ 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
+ 247, 248, 249, 250, 251, 252, 253, 254, 255, 256
+};
+
+int main()
+{
+ test( data, DLEN );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+any
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+space
+any
+ascii
+extend
+cntrl
+space
+any
+ascii
+extend
+cntrl
+space
+any
+ascii
+extend
+cntrl
+space
+any
+ascii
+extend
+cntrl
+space
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+cntrl
+any
+ascii
+extend
+print
+space
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+digit
+alnum
+graph
+print
+xdigit
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+upper
+graph
+print
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+xdigit
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+alpha
+alnum
+lower
+graph
+print
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+graph
+print
+punct
+any
+ascii
+extend
+cntrl
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+extend
+any
+ACCEPT
+#endif
diff --git a/test/call1.rl b/test/call1.rl
new file mode 100644
index 0000000..24a362f
--- /dev/null
+++ b/test/call1.rl
@@ -0,0 +1,103 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int num = 0;
+
+struct test
+{
+ int cs, top, stack[32];
+};
+
+%%{
+ machine test;
+ access fsm->;
+
+ action check_num {
+ if ( num & 1 )
+ fcall *fentry(odd);
+ else
+ fcall even;
+ }
+
+ # Test call and return functionality.
+ even := 'even' any @{fhold; fret;};
+ odd := 'odd' any @{fhold; fret;};
+ num = [0-9]+ ${ num = num * 10 + (fc - '0'); };
+ even_odd = num ' ' @check_num "\n";
+
+ # Test calls in out actions.
+ fail := !(any*);
+ out_acts = 'OA ok\n' |
+ 'OA error1\n' |
+ 'OA error2\n';
+
+ main := even_odd | out_acts;
+}%%
+
+%% write data;
+
+void test_init( struct test *fsm )
+{
+ num = 0;
+ %% write init;
+}
+
+void test_execute( struct test *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data+len;
+
+ %% write exec;
+}
+
+int test_finish( struct test *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == test_error )
+ return -1;
+ if ( fsm->cs >= test_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+void test( char *buf )
+{
+ struct test test;
+ test_init( &test );
+ test_execute( &test, buf, strlen(buf) );
+ if ( test_finish( &test ) > 0 )
+ printf( "ACCEPT\n" );
+ else
+ printf( "FAIL\n" );
+}
+
+int main()
+{
+ test( "78 even\n" );
+ test( "89 odd\n" );
+ test( "1 even\n" );
+ test( "0 odd\n" );
+ test( "OA ok\n" );
+ test( "OA error1\n" );
+ test( "OA error2\n" );
+
+ return 0;
+}
+
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+ACCEPT
+ACCEPT
+ACCEPT
+#endif
diff --git a/test/call2.rl b/test/call2.rl
new file mode 100644
index 0000000..c447496
--- /dev/null
+++ b/test/call2.rl
@@ -0,0 +1,118 @@
+/*
+ * @LANG: c++
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int num = 0;
+
+struct CallTest
+{
+ int cs, top, stack[32];
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ void execute( const char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine CallTest;
+
+ action check_num {
+ if ( num & 1 )
+ fcall *fentry(odd);
+ else
+ fcall even;
+ }
+
+ # Test call and return functionality.
+ even := 'even' any @{fhold; fret;};
+ odd := 'odd' any @{fhold; fret;};
+ num = [0-9]+ ${ num = num * 10 + (fc - '0'); };
+ even_odd = num ' ' @check_num "\n";
+
+ # Test calls in out actions.
+ fail := !(any*);
+ out_acts = 'OA ok\n' |
+ 'OA error1\n' |
+ 'OA error2\n';
+
+ main := even_odd | out_acts;
+}%%
+
+%% write data;
+
+void CallTest::init( )
+{
+ num = 0;
+ %% write init;
+}
+
+void CallTest::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data+len;
+
+ %% write exec;
+}
+
+int CallTest::finish( )
+{
+ %% write eof;
+
+ if ( this->cs == CallTest_error )
+ return -1;
+ if ( this->cs >= CallTest_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+void test( char *buf )
+{
+ CallTest test;
+
+ test.init();
+ test.execute( buf, strlen(buf) );
+ if ( test.finish() > 0 )
+ printf( "ACCEPT\n" );
+ else
+ printf( "FAIL\n" );
+}
+
+int main()
+{
+ test( "78 even\n" );
+ test( "89 odd\n" );
+ test( "1 even\n" );
+ test( "0 odd\n" );
+ test( "OA ok\n" );
+ test( "OA error1\n" );
+ test( "OA error2\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+ACCEPT
+ACCEPT
+ACCEPT
+#endif
diff --git a/test/call3.rl b/test/call3.rl
new file mode 100644
index 0000000..c253b24
--- /dev/null
+++ b/test/call3.rl
@@ -0,0 +1,123 @@
+/*
+ * @LANG: obj-c
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <objc/Object.h>
+
+
+int num = 0;
+
+@interface CallTest : Object
+{
+@public
+ /* State machine operation data. */
+ int cs, top, stack[32];
+};
+
+// Initialize the machine. Invokes any init statement blocks. Returns 0
+// if the machine begins in a non-accepting state and 1 if the machine
+// begins in an accepting state.
+- (void) initFsm;
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (void) executeWithData:(const char *)data len:(int)len;
+
+// Indicate that there is no more data. Returns -1 if the machine finishes
+// in the error state and does not accept, 0 if the machine finishes
+// in any other non-accepting state and 1 if the machine finishes in an
+// accepting state.
+- (int) finish;
+
+@end
+
+@implementation CallTest
+
+%%{
+ machine CallTest;
+
+ action check_num {
+ if ( num & 1 )
+ fcall odd;
+ else
+ fcall even;
+ }
+
+ # Test call and return functionality.
+ even := 'even' any @{fhold; fret;};
+ odd := 'odd' any @{fhold; fret;};
+ num = [0-9]+ ${ num = num * 10 + (fc - '0'); };
+ even_odd = num ' ' @check_num "\n";
+
+ # Test calls in out actions.
+ fail := !(any*);
+ out_acts = 'OA ok\n' |
+ 'OA error1\n' |
+ 'OA error2\n';
+
+ main := even_odd | out_acts;
+}%%
+
+%% write data;
+
+- (void) initFsm;
+{
+ num = 0;
+ %% write init;
+}
+
+- (void) executeWithData:(const char *)data len:(int)len;
+{
+ const char *p = data;
+ const char *pe = data + len;
+ %% write exec;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( cs == CallTest_error )
+ return -1;
+ return ( cs >= CallTest_first_final ) ? 1 : 0;
+}
+
+@end
+
+#define BUFSIZE 1024
+
+void test( char *buf )
+{
+ CallTest *test = [[CallTest alloc] init];
+ [test initFsm];
+ [test executeWithData:buf len:strlen(buf)];
+ if ( [test finish] > 0 )
+ printf( "ACCEPT\n" );
+ else
+ printf( "FAIL\n" );
+}
+
+int main()
+{
+ test( "78 even\n" );
+ test( "89 odd\n" );
+ test( "1 even\n" );
+ test( "0 odd\n" );
+ test( "OA ok\n" );
+ test( "OA error1\n" );
+ test( "OA error2\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+ACCEPT
+ACCEPT
+ACCEPT
+#endif
diff --git a/test/clang1.rl b/test/clang1.rl
new file mode 100644
index 0000000..ade8f9a
--- /dev/null
+++ b/test/clang1.rl
@@ -0,0 +1,283 @@
+/*
+ * @LANG: c
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#define IDENT_BUFLEN 256
+
+%%{
+ machine clang;
+
+ # Function to buffer a character.
+ action bufChar {
+ if ( identLen < IDENT_BUFLEN ) {
+ identBuf[identLen] = fc;
+ identLen += 1;
+ }
+ }
+
+ # Function to clear the buffer.
+ action clearBuf {
+ identLen = 0;
+ }
+
+ # Functions to dump tokens as they are matched.
+ action ident {
+ identBuf[identLen] = 0;
+ printf("ident(%i): %s\n", curLine, identBuf);
+ }
+ action literal {
+ identBuf[identLen] = 0;
+ printf("literal(%i): %s\n", curLine, identBuf);
+ }
+ action float {
+ identBuf[identLen] = 0;
+ printf("float(%i): %s\n", curLine, identBuf);
+ }
+ action int {
+ identBuf[identLen] = 0;
+ printf("int(%i): %s\n", curLine, identBuf);
+ }
+ action hex {
+ identBuf[identLen] = 0;
+ printf("hex(%i): 0x%s\n", curLine, identBuf);
+ }
+ action symbol {
+ identBuf[identLen] = 0;
+ printf("symbol(%i): %s\n", curLine, identBuf);
+ }
+
+ # Alpha numberic characters or underscore.
+ alnumu = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alphau = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ ident = (alphau . alnumu*) >clearBuf $bufChar %ident;
+
+ # Match single characters inside literal strings. Or match
+ # an escape sequence. Buffers the charater matched.
+ sliteralChar =
+ ( extend - ['\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+ dliteralChar =
+ ( extend - ["\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+
+ # Single quote and double quota literals. At the start clear
+ # the buffer. Upon leaving dump the literal.
+ sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
+ dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
+ literal = sliteral | dliteral;
+
+ # Whitespace is standard ws, newlines and control codes.
+ whitespace = any - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ ccComment = '//' . extend* $0 . '\n' @1;
+ cComment = '/*' . extend* $0 . '*/' @1;
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ int = digit+ %int;
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ hex = '0x' . xdigit+ >clearBuf $bufChar %hex;
+
+ # Or together all the lanuage elements.
+ fin = ( ccComment |
+ cComment |
+ symbol |
+ ident |
+ literal |
+ whitespace |
+ int |
+ float |
+ hex );
+
+ # Star the language elements. It is critical in this type of application
+ # that we decrease the priority of out transitions before doing so. This
+ # is so that when we see 'aa' we stay in the fin machine to match an ident
+ # of length two and not wrap around to the front to match two idents of
+ # length one.
+ clang_main = ( fin $1 %0 )*;
+
+ # This machine matches everything, taking note of newlines.
+ newline = ( any | '\n' @{ curLine += 1; } )*;
+
+ # The final fsm is the lexer intersected with the newline machine which
+ # will count lines for us. Since the newline machine accepts everything,
+ # the strings accepted is goverened by the clang_main machine, onto which
+ # the newline machine overlays line counting.
+ main := clang_main & newline;
+}%%
+
+#include <stdio.h>
+
+%% write data noerror;
+
+
+char data[] =
+ "/*\n"
+ " * Copyright\n"
+ " */\n"
+ "\n"
+ "/* Aapl.\n"
+ " */\n"
+ " \n"
+ "#define _AAPL_RESIZE_H\n"
+ "\n"
+ "#include <assert.h>\n"
+ "\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "namespace Aapl {\n"
+ "#endif\n"
+ "#define LIN_DEFAULT_STEP 256\n"
+ "#define EXPN_UP( existing, needed ) \\\n"
+ " need > eng ? (ned<<1) : eing\n"
+ " \n"
+ "\n"
+ "/*@}*/\n"
+ "#undef EXPN_UP\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "#endif /* _AAPL_RESIZE_H */\n";
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ char *p = buf, *pe = buf + len;
+ char identBuf[IDENT_BUFLEN+1];
+ int identLen;
+ int curLine;
+ int cs;
+
+ identLen = 0;
+ curLine = 1;
+
+ %% write init;
+ %% write exec;
+ %% write eof;
+
+ if ( cs >= clang_first_final )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test(
+ "999 0xaAFF99 99.99 /*\n"
+ "*/ 'lksdj' //\n"
+ "\"\n"
+ "\n"
+ "literal\n"
+ "\n"
+ "\n"
+ "\"0x00aba foobardd.ddsf 0x0.9\n" );
+ test(
+ "wordwithnum00asdf\n"
+ "000wordfollowsnum,makes new symbol\n"
+ "\n"
+ "finishing early /* unfinished ...\n" );
+ test( data );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+int(1): 999
+hex(1): 0xaAFF99
+float(1): 99.99
+literal(2): lksdj
+literal(8):
+
+literal
+
+
+
+hex(8): 0x00aba
+ident(8): foobardd
+symbol(8): .
+ident(8): ddsf
+hex(8): 0x0
+symbol(8): .
+int(8): 9
+ACCEPT
+ident(1): wordwithnum00asdf
+int(2): 000
+ident(2): wordfollowsnum
+symbol(2): ,
+ident(2): makes
+ident(2): new
+ident(2): symbol
+ident(4): finishing
+ident(4): early
+FAIL
+symbol(8): #
+ident(8): define
+ident(8): _AAPL_RESIZE_H
+symbol(10): #
+ident(10): include
+symbol(10): <
+ident(10): assert
+symbol(10): .
+ident(10): h
+symbol(10): >
+symbol(12): #
+ident(12): ifdef
+ident(12): AAPL_NAMESPACE
+ident(13): namespace
+ident(13): Aapl
+symbol(13): {
+symbol(14): #
+ident(14): endif
+symbol(15): #
+ident(15): define
+ident(15): LIN_DEFAULT_STEP
+int(15): 256
+symbol(16): #
+ident(16): define
+ident(16): EXPN_UP
+symbol(16): (
+ident(16): existing
+symbol(16): ,
+ident(16): needed
+symbol(16): )
+symbol(16): \
+ident(17): need
+symbol(17): >
+ident(17): eng
+symbol(17): ?
+symbol(17): (
+ident(17): ned
+symbol(17): <
+symbol(17): <
+int(17): 1
+symbol(17): )
+symbol(17): :
+ident(17): eing
+symbol(21): #
+ident(21): undef
+ident(21): EXPN_UP
+symbol(22): #
+ident(22): ifdef
+ident(22): AAPL_NAMESPACE
+symbol(23): #
+ident(23): endif
+ACCEPT
+#endif
diff --git a/test/clang2.rl b/test/clang2.rl
new file mode 100644
index 0000000..fcb6ba8
--- /dev/null
+++ b/test/clang2.rl
@@ -0,0 +1,324 @@
+/*
+ * @LANG: obj-c
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <objc/Object.h>
+#include <string.h>
+
+#define IDENT_BUFLEN 256
+
+@interface Clang : Object
+{
+@public
+ /* State machine operation data. */
+ int cs;
+
+ /* Parsing data. */
+ char identBuf[IDENT_BUFLEN+1];
+ int identLen;
+ int curLine;
+};
+
+- (void) initFsm;
+- (void) executeWithData:(const char *)data len:(int)len;
+- (int) finish;
+
+@end
+
+%%{
+ machine Clang;
+
+ # Function to buffer a character.
+ action bufChar {
+ if ( identLen < IDENT_BUFLEN ) {
+ identBuf[identLen] = fc;
+ identLen += 1;
+ }
+ }
+
+ # Function to clear the buffer.
+ action clearBuf {
+ identLen = 0;
+ }
+
+ # Functions to dump tokens as they are matched.
+ action ident {
+ identBuf[identLen] = 0;
+ printf("ident(%i): %s\n", curLine, identBuf);
+ }
+ action literal {
+ identBuf[identLen] = 0;
+ printf("literal(%i): %s\n", curLine, identBuf);
+ }
+ action float {
+ identBuf[identLen] = 0;
+ printf("float(%i): %s\n", curLine, identBuf);
+ }
+ action int {
+ identBuf[identLen] = 0;
+ printf("int(%i): %s\n", curLine, identBuf);
+ }
+ action hex {
+ identBuf[identLen] = 0;
+ printf("hex(%i): 0x%s\n", curLine, identBuf);
+ }
+ action symbol {
+ identBuf[identLen] = 0;
+ printf("symbol(%i): %s\n", curLine, identBuf);
+ }
+
+ # Alpha numberic characters or underscore.
+ alnumu = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alphau = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ ident = (alphau . alnumu*) >clearBuf $bufChar %ident;
+
+ # Match single characters inside literal strings. Or match
+ # an escape sequence. Buffers the charater matched.
+ sliteralChar =
+ ( extend - ['\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+ dliteralChar =
+ ( extend - ["\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+
+ # Single quote and double quota literals. At the start clear
+ # the buffer. Upon leaving dump the literal.
+ sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
+ dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
+ literal = sliteral | dliteral;
+
+ # Whitespace is standard ws, newlines and control codes.
+ whitespace = any - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ ccComment = '//' . extend* $0 . '\n' @1;
+ cComment = '/*' . extend* $0 . '*/' @1;
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ int = digit+ %int;
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ hex = '0x' . xdigit+ >clearBuf $bufChar %hex;
+
+ # Or together all the lanuage elements.
+ fin = ( ccComment |
+ cComment |
+ symbol |
+ ident |
+ literal |
+ whitespace |
+ int |
+ float |
+ hex );
+
+ # Star the language elements. It is critical in this type of application
+ # that we decrease the priority of out transitions before doing so. This
+ # is so that when we see 'aa' we stay in the fin machine to match an ident
+ # of length two and not wrap around to the front to match two idents of
+ # length one.
+ clang_main = ( fin $1 %0 )*;
+
+ # This machine matches everything, taking note of newlines.
+ newline = ( any | '\n' @{ curLine += 1; } )*;
+
+ # The final fsm is the lexer intersected with the newline machine which
+ # will count lines for us. Since the newline machine accepts everything,
+ # the strings accepted is goverened by the clang_main machine, onto which
+ # the newline machine overlays line counting.
+ main := clang_main & newline;
+}%%
+
+@implementation Clang
+
+%% write data;
+
+- (void) initFsm;
+{
+ identLen = 0;
+ curLine = 1;
+ %% write init;
+}
+
+- (void) executeWithData:(const char *)data len:(int)len;
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( cs == Clang_error )
+ return -1;
+ if ( cs >= Clang_first_final )
+ return 1;
+ return 0;
+}
+
+@end
+
+#define BUFSIZE 2048
+
+Clang *fsm;
+char buf[BUFSIZE];
+
+void test( char *buf )
+{
+ int len = strlen(buf);
+ fsm = [[Clang alloc] init];
+ [fsm initFsm];
+ [fsm executeWithData:buf len:len];
+ if ( [fsm finish] > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test(
+ "999 0xaAFF99 99.99 /*\n"
+ "*/ 'lksdj' //\n"
+ "\"\n"
+ "\n"
+ "literal\n"
+ "\n"
+ "\n"
+ "\"0x00aba foobardd.ddsf 0x0.9\n" );
+
+ test(
+ "wordwithnum00asdf\n"
+ "000wordfollowsnum,makes new symbol\n"
+ "\n"
+ "finishing early /* unfinished ...\n" );
+
+ test(
+ "/*\n"
+ " * Copyright\n"
+ " */\n"
+ "\n"
+ "/* Aapl.\n"
+ " */\n"
+ " \n"
+ "#define _AAPL_RESIZE_H\n"
+ "\n"
+ "#include <assert.h>\n"
+ "\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "namespace Aapl {\n"
+ "#endif\n"
+ "#define LIN_DEFAULT_STEP 256\n"
+ "#define EXPN_UP( existing, needed ) \\\n"
+ " need > eng ? (ned<<1) : eing\n"
+ " \n"
+ "\n"
+ "/*@}*/\n"
+ "#undef EXPN_UP\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "#endif /* _AAPL_RESIZE_H */\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+int(1): 999
+hex(1): 0xaAFF99
+float(1): 99.99
+literal(2): lksdj
+literal(8):
+
+literal
+
+
+
+hex(8): 0x00aba
+ident(8): foobardd
+symbol(8): .
+ident(8): ddsf
+hex(8): 0x0
+symbol(8): .
+int(8): 9
+ACCEPT
+ident(1): wordwithnum00asdf
+int(2): 000
+ident(2): wordfollowsnum
+symbol(2): ,
+ident(2): makes
+ident(2): new
+ident(2): symbol
+ident(4): finishing
+ident(4): early
+FAIL
+symbol(8): #
+ident(8): define
+ident(8): _AAPL_RESIZE_H
+symbol(10): #
+ident(10): include
+symbol(10): <
+ident(10): assert
+symbol(10): .
+ident(10): h
+symbol(10): >
+symbol(12): #
+ident(12): ifdef
+ident(12): AAPL_NAMESPACE
+ident(13): namespace
+ident(13): Aapl
+symbol(13): {
+symbol(14): #
+ident(14): endif
+symbol(15): #
+ident(15): define
+ident(15): LIN_DEFAULT_STEP
+int(15): 256
+symbol(16): #
+ident(16): define
+ident(16): EXPN_UP
+symbol(16): (
+ident(16): existing
+symbol(16): ,
+ident(16): needed
+symbol(16): )
+symbol(16): \
+ident(17): need
+symbol(17): >
+ident(17): eng
+symbol(17): ?
+symbol(17): (
+ident(17): ned
+symbol(17): <
+symbol(17): <
+int(17): 1
+symbol(17): )
+symbol(17): :
+ident(17): eing
+symbol(21): #
+ident(21): undef
+ident(21): EXPN_UP
+symbol(22): #
+ident(22): ifdef
+ident(22): AAPL_NAMESPACE
+symbol(23): #
+ident(23): endif
+ACCEPT
+#endif
diff --git a/test/clang3.rl b/test/clang3.rl
new file mode 100644
index 0000000..d950eb3
--- /dev/null
+++ b/test/clang3.rl
@@ -0,0 +1,321 @@
+/*
+ * @LANG: d
+ * A mini C-like language scanner.
+ */
+
+module clang;
+
+import std.c.stdio;
+
+char[] string(char c)
+{
+ char[] result = new char[2];
+ result[0] = c;
+ result[1] = 0;
+ return result[0 .. 1];
+}
+
+class CLang
+{
+ /* Parsing data. */
+ char[] identBuf;
+ int curLine;
+
+ this()
+ {
+ }
+
+ /* State machine operation data. */
+ int cs;
+
+ %%{
+ machine clang;
+
+ # Function to buffer a character.
+ action bufChar {
+ identBuf ~= fc;
+ }
+
+ # Function to clear the buffer.
+ action clearBuf {
+
+ identBuf = null;
+ }
+
+ # Functions to dump tokens as they are matched.
+ action ident {
+ printf("ident(%i): %.*s\n", curLine, identBuf);
+ }
+ action literal {
+ printf("literal(%i): %.*s\n", curLine, identBuf);
+ }
+ action float {
+ printf("float(%i): %.*s\n", curLine, identBuf);
+ }
+ action int {
+ printf("int(%i): %.*s\n", curLine, identBuf);
+ }
+ action hex {
+ printf("hex(%i): 0x%.*s\n", curLine, identBuf);
+ }
+ action symbol {
+ printf("symbol(%i): %.*s\n", curLine, identBuf);
+ }
+
+ # Alpha numberic characters or underscore.
+ alnumu = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alphau = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ ident = (alphau . alnumu*) >clearBuf $bufChar %ident;
+
+ # Match single characters inside literal strings. Or match
+ # an escape sequence. Buffers the charater matched.
+ sliteralChar =
+ ( extend - ['\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+ dliteralChar =
+ ( extend - ["\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+
+ # Single quote and double quota literals. At the start clear
+ # the buffer. Upon leaving dump the literal.
+ sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
+ dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
+ literal = sliteral | dliteral;
+
+ # Whitespace is standard ws, newlines and control codes.
+ whitespace = any - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ ccComment = '//' . extend* $0 . '\n' @1;
+ cComment = '/*' . extend* $0 . '*/' @1;
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ int = digit+ %int;
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ hex = '0x' . xdigit+ >clearBuf $bufChar %hex;
+
+ # Or together all the lanuage elements.
+ fin = ( ccComment |
+ cComment |
+ symbol |
+ ident |
+ literal |
+ whitespace |
+ int |
+ float |
+ hex );
+
+ # Star the language elements. It is critical in this type of application
+ # that we decrease the priority of out transitions before doing so. This
+ # is so that when we see 'aa' we stay in the fin machine to match an ident
+ # of length two and not wrap around to the front to match two idents of
+ # length one.
+ clang_main = ( fin $1 %0 )*;
+
+ # This machine matches everything, taking note of newlines.
+ newline = ( any | '\n' @{ curLine++; } )*;
+
+ # The final fsm is the lexer intersected with the newline machine which
+ # will count lines for us. Since the newline machine accepts everything,
+ # the strings accepted is goverened by the clang_main machine, onto which
+ # the newline machine overlays line counting.
+ main := clang_main & newline;
+ }%%
+
+ %% write data noprefix;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( )
+ {
+ curLine = 1;
+ %% write init;
+ }
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ void execute( char* _data, int _len )
+ {
+ char *p = _data;
+ char *pe = _data + _len;
+ %% write exec;
+ }
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( )
+ {
+ %% write eof;
+ if ( cs == error )
+ return -1;
+ if ( cs >= first_final )
+ return 1;
+ return 0;
+ }
+}
+
+static const int BUFSIZE = 1024;
+
+void test( char buf[] )
+{
+ CLang scanner = new CLang();
+ scanner.init();
+ scanner.execute( buf.ptr, buf.length );
+ if ( scanner.finish() > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+
+ return 0;
+}
+
+int main()
+{
+ test(
+ "999 0xaAFF99 99.99 /*\n"
+ "*/ 'lksdj' //\n"
+ "\"\n"
+ "\n"
+ "literal\n"
+ "\n"
+ "\n"
+ "\"0x00aba foobardd.ddsf 0x0.9\n" );
+
+ test(
+ "wordwithnum00asdf\n"
+ "000wordfollowsnum,makes new symbol\n"
+ "\n"
+ "finishing early /* unfinished ...\n" );
+
+ test(
+ "/*\n"
+ " * Copyright\n"
+ " */\n"
+ "\n"
+ "/* Aapl.\n"
+ " */\n"
+ " \n"
+ "#define _AAPL_RESIZE_H\n"
+ "\n"
+ "#include <assert.h>\n"
+ "\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "namespace Aapl {\n"
+ "#endif\n"
+ "#define LIN_DEFAULT_STEP 256\n"
+ "#define EXPN_UP( existing, needed ) \\\n"
+ " need > eng ? (ned<<1) : eing\n"
+ " \n"
+ "\n"
+ "/*@}*/\n"
+ "#undef EXPN_UP\n"
+ "#ifdef AAPL_NAMESPACE\n"
+ "#endif /* _AAPL_RESIZE_H */\n" );
+ return 0;
+}
+
+/+ _____OUTPUT_____
+int(1): 999
+hex(1): 0xaAFF99
+float(1): 99.99
+literal(2): lksdj
+literal(8):
+
+literal
+
+
+
+hex(8): 0x00aba
+ident(8): foobardd
+symbol(8): .
+ident(8): ddsf
+hex(8): 0x0
+symbol(8): .
+int(8): 9
+ACCEPT
+ident(1): wordwithnum00asdf
+int(2): 000
+ident(2): wordfollowsnum
+symbol(2): ,
+ident(2): makes
+ident(2): new
+ident(2): symbol
+ident(4): finishing
+ident(4): early
+FAIL
+symbol(8): #
+ident(8): define
+ident(8): _AAPL_RESIZE_H
+symbol(10): #
+ident(10): include
+symbol(10): <
+ident(10): assert
+symbol(10): .
+ident(10): h
+symbol(10): >
+symbol(12): #
+ident(12): ifdef
+ident(12): AAPL_NAMESPACE
+ident(13): namespace
+ident(13): Aapl
+symbol(13): {
+symbol(14): #
+ident(14): endif
+symbol(15): #
+ident(15): define
+ident(15): LIN_DEFAULT_STEP
+int(15): 256
+symbol(16): #
+ident(16): define
+ident(16): EXPN_UP
+symbol(16): (
+ident(16): existing
+symbol(16): ,
+ident(16): needed
+symbol(16): )
+symbol(16): \
+ident(17): need
+symbol(17): >
+ident(17): eng
+symbol(17): ?
+symbol(17): (
+ident(17): ned
+symbol(17): <
+symbol(17): <
+int(17): 1
+symbol(17): )
+symbol(17): :
+ident(17): eing
+symbol(21): #
+ident(21): undef
+ident(21): EXPN_UP
+symbol(22): #
+ident(22): ifdef
+ident(22): AAPL_NAMESPACE
+symbol(23): #
+ident(23): endif
+ACCEPT
+++++++++++++++++/
diff --git a/test/cond1.rl b/test/cond1.rl
new file mode 100644
index 0000000..620ea5e
--- /dev/null
+++ b/test/cond1.rl
@@ -0,0 +1,68 @@
+/*
+ * @LANG: indep
+ */
+bool i;
+bool j;
+bool k;
+%%
+
+%%{
+ machine foo;
+
+ action c1 {i}
+ action c2 {j}
+ action c3 {k}
+ action one { prints " one\n";}
+ action two { prints " two\n";}
+ action three { prints " three\n";}
+
+ action seti { if ( fc == '0' ) i = false; else i = true; }
+ action setj { if ( fc == '0' ) j = false; else j = true; }
+ action setk { if ( fc == '0' ) k = false; else k = true; }
+
+ action break {fbreak;}
+
+ one = 'a' 'b' when c1 'c' @one;
+ two = 'a'* 'b' when c2 'c' @two;
+ three = 'a'+ 'b' when c3 'c' @three;
+
+ main :=
+ [01] @seti
+ [01] @setj
+ [01] @setk
+ ( one | two | three ) '\n' @break;
+
+}%%
+
+/* _____INPUT_____
+"000abc\n"
+"100abc\n"
+"010abc\n"
+"110abc\n"
+"001abc\n"
+"101abc\n"
+"011abc\n"
+"111abc\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+FAIL
+ one
+ACCEPT
+ two
+ACCEPT
+ one
+ two
+ACCEPT
+ three
+ACCEPT
+ one
+ three
+ACCEPT
+ two
+ three
+ACCEPT
+ one
+ two
+ three
+ACCEPT
+_____OUTPUT_____ */
diff --git a/test/cond2.rl b/test/cond2.rl
new file mode 100644
index 0000000..7593a3f
--- /dev/null
+++ b/test/cond2.rl
@@ -0,0 +1,91 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using std::cout;
+using std::endl;
+
+%%{
+ machine foo;
+
+ action c1 {i}
+ action c2 {j}
+
+ action one { cout << " one" << endl;}
+ action two { cout << " two" << endl;}
+
+ main := (
+ [a-z] |
+ ('\n' when c1 @one)
+ )*
+ ('\n' when c2 @two);
+}%%
+
+%% write data noerror;
+
+void test( int i, int j, char *str )
+{
+ int cs = foo_start;
+ char *p = str;
+ char *pe = str + strlen( str );
+
+ cout << "run:" << endl;
+ %% write exec;
+ if ( cs >= foo_first_final )
+ cout << " success" << endl;
+ else
+ cout << " failure" << endl;
+ cout << endl;
+}
+
+int main()
+{
+ test( 0, 0, "hi\n\n" );
+ test( 1, 0, "hi\n\n" );
+ test( 0, 1, "hi\n" );
+ test( 0, 1, "hi\n\n" );
+ test( 1, 1, "hi\n" );
+ test( 1, 1, "hi\n\n" );
+ test( 1, 1, "hi\n\nx" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+run:
+ failure
+
+run:
+ one
+ one
+ failure
+
+run:
+ two
+ success
+
+run:
+ two
+ failure
+
+run:
+ one
+ two
+ success
+
+run:
+ one
+ two
+ one
+ two
+ success
+
+run:
+ one
+ two
+ one
+ two
+ failure
+
+#endif
diff --git a/test/cond3.rl b/test/cond3.rl
new file mode 100644
index 0000000..1847727
--- /dev/null
+++ b/test/cond3.rl
@@ -0,0 +1,59 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using std::cout;
+using std::endl;
+
+%%{
+ machine foo;
+
+ action hit_5 {c == 5}
+ action done { cout << " done" << endl; }
+ action inc {c++;}
+
+ # The any* includes '\n' when hit_5 is true, so use guarded concatenation.
+ main := (any @inc)* :> '\n' when hit_5 @done;
+}%%
+
+%% write data noerror;
+
+void test( char *str )
+{
+ int cs = foo_start;
+ int c = 0;
+ char *p = str;
+ char *pe = str + strlen( str );
+
+ cout << "run:" << endl;
+ %% write exec;
+ if ( cs >= foo_first_final )
+ cout << " success" << endl;
+ else
+ cout << " failure" << endl;
+ cout << endl;
+}
+
+int main()
+{
+ test( "12345\n" ); // success
+ test( "\n2345\n" ); // success, first newline ignored
+ test( "1234\n" ); // failure, didn't get 5 chars before newline.
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+run:
+ done
+ success
+
+run:
+ done
+ success
+
+run:
+ failure
+
+#endif
diff --git a/test/cond4.rl b/test/cond4.rl
new file mode 100644
index 0000000..380c5ff
--- /dev/null
+++ b/test/cond4.rl
@@ -0,0 +1,54 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using std::cout;
+using std::endl;
+
+%%{
+ machine foo;
+
+ action c1 {(cout << "c1 ", true)}
+ action c2 {(cout << "c2 ", true)}
+ action c3 {(cout << "c3 ", true)}
+ action c4 {(cout << "c4 ", true)}
+
+ main := (
+ 10 .. 60 when c1 |
+ 20 .. 40 when c2 |
+ 30 .. 50 when c3 |
+ 32 .. 38 when c4 |
+ 0 .. 70 )* ${cout << "char: " << (int)*p << endl;};
+}%%
+
+%% write data noerror nofinal;
+
+void test( char *str )
+{
+ int len = strlen( str );
+ int cs = foo_start;
+ char *p = str, *pe = str+len;
+ %% write exec;
+}
+
+char data[] = { 5, 15, 25, 31, 35, 39, 45, 55, 65, 0 };
+
+int main()
+{
+ test( data );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+char: 5
+c1 char: 15
+c1 c2 char: 25
+c1 c2 c3 char: 31
+c1 c2 c3 c4 char: 35
+c1 c2 c3 char: 39
+c1 c3 char: 45
+c1 char: 55
+char: 65
+#endif
diff --git a/test/cond5.rl b/test/cond5.rl
new file mode 100644
index 0000000..57e3c85
--- /dev/null
+++ b/test/cond5.rl
@@ -0,0 +1,59 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using std::cout;
+using std::endl;
+
+%%{
+ machine foo;
+ write data noerror;
+}%%
+
+void test( char *str )
+{
+ int cs = foo_start;
+ int c = 0;
+ char *p = str;
+ char *pe = str + strlen( str );
+ char last = '0';
+
+ cout << "run:";
+ %%{
+ action d1 { cout << " d1"; }
+ action see_five { cout << " see_five"; }
+
+ see_five = ([0-9] when{c++ < 5} @d1)* '\n' @see_five;
+
+ action in_sequence { cout << " in_sequence"; }
+ action d2 { last = *p; cout << " d2"; }
+ in_sequence = ( [0-9] when { *p == last+1 } @d2 )* '\n' @in_sequence;
+
+ main := ( see_five | in_sequence ) ${cout << " |";};
+
+ write exec;
+ }%%
+ if ( cs < foo_first_final )
+ cout << " failure";
+ cout << endl;
+}
+
+int main()
+{
+ test( "123456789012\n" ); // fails both
+ test( "123456789\n" ); // fails five
+ test( "1234\n" ); // fails five
+ test( "13245\n" ); // fails sequence
+ test( "12345\n" ); // succeeds in both
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | failure
+run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | in_sequence |
+run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence |
+run: d1 d2 | d1 | d1 | d1 | d1 | see_five |
+run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence |
+#endif
diff --git a/test/cond6.rl b/test/cond6.rl
new file mode 100644
index 0000000..25bf45d
--- /dev/null
+++ b/test/cond6.rl
@@ -0,0 +1,61 @@
+/*
+ * @LANG: c++
+ */
+
+/* Balanced parenthesis with conditions. */
+
+#include <iostream>
+#include <string.h>
+using std::cout;
+using std::endl;
+
+%%{
+ machine cond;
+ write data noerror;
+}%%
+
+void test( char *str )
+{
+ int cs = cond_start, n = 0;
+ char *p = str;
+ char *pe = str + strlen( str );
+
+ %%{
+ comment = '(' @{n=0;}
+ ( '('@{n++;} | ')'@{n--;} | [^()] )*
+ :> ')' when{!n};
+
+ main := ' '* comment ' '* '\n' @{cout << "success";};
+
+ write exec;
+ }%%
+ if ( cs < cond_first_final )
+ cout << "failure";
+ cout << endl;
+}
+
+int main()
+{
+ test( "( ( )\n" );
+ test( "()()\n" );
+ test( "(((\n" );
+ test( "((()\n" );
+ test( "((())\n" );
+ test( "()\n" );
+ test( "((()))\n" );
+ test( "(()())\n" );
+ test( "((())()(((()))))\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+failure
+failure
+failure
+failure
+failure
+success
+success
+success
+success
+#endif
diff --git a/test/cppscan1.h b/test/cppscan1.h
new file mode 100644
index 0000000..4497cd2
--- /dev/null
+++ b/test/cppscan1.h
@@ -0,0 +1,110 @@
+#ifndef _CPPSCAN1_H
+#define _CPPSCAN1_H
+
+#include <iostream>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+#define TK_Dlit 192
+#define TK_Slit 193
+#define TK_Float 194
+#define TK_Id 195
+#define TK_NameSep 197
+#define TK_Arrow 211
+#define TK_PlusPlus 212
+#define TK_MinusMinus 213
+#define TK_ArrowStar 214
+#define TK_DotStar 215
+#define TK_ShiftLeft 216
+#define TK_ShiftRight 217
+#define TK_IntegerDecimal 218
+#define TK_IntegerOctal 219
+#define TK_IntegerHex 220
+#define TK_EqualsEquals 223
+#define TK_NotEquals 224
+#define TK_AndAnd 225
+#define TK_OrOr 226
+#define TK_MultAssign 227
+#define TK_DivAssign 228
+#define TK_PercentAssign 229
+#define TK_PlusAssign 230
+#define TK_MinusAssign 231
+#define TK_AmpAssign 232
+#define TK_CaretAssign 233
+#define TK_BarAssign 234
+#define TK_DotDotDot 240
+
+/* A growable buffer for collecting headers. */
+struct Buffer
+{
+ Buffer() : data(0), allocated(0), length(0) { }
+ Buffer( const Buffer &other ) {
+ data = (char*)malloc( other.allocated );
+ memcpy( data, other.data, other.length );
+ allocated = other.allocated;
+ length = other.length;
+ }
+ ~Buffer() { empty(); }
+
+ void append( char p ) {
+ if ( ++length > allocated )
+ upAllocate( length*2 );
+ data[length-1] = p;
+ }
+ void append( char *str, int len ) {
+ if ( (length += len) > allocated )
+ upAllocate( length*2 );
+ memcpy( data+length-len, str, len );
+ }
+
+ void clear() { length = 0; }
+ void upAllocate( int len );
+ void empty();
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+
+struct Scanner
+{
+ Scanner( std::ostream &out )
+ : out(out) { }
+
+ std::ostream &out;
+
+ int line, col;
+ int tokStart;
+ int inlineDepth;
+ int count;
+ Buffer tokBuf;
+ Buffer nonTokBuf;
+
+ void pass(char c) { nonTokBuf.append(c); }
+ void buf(char c) { tokBuf.append(c); }
+ void token( int id );
+
+ int cs, stack, top;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+#endif
diff --git a/test/cppscan1.rl b/test/cppscan1.rl
new file mode 100644
index 0000000..623af28
--- /dev/null
+++ b/test/cppscan1.rl
@@ -0,0 +1,282 @@
+/*
+ * @LANG: c++
+ * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P
+ */
+
+#include "cppscan1.h"
+
+%%{
+ machine Scanner;
+ access fsm->;
+
+ action pass { fsm->pass(fc); }
+ action buf { fsm->buf(fc); }
+
+ action emit_slit { fsm->token( TK_Slit ); }
+ action emit_dlit { fsm->token( TK_Dlit ); }
+ action emit_id { fsm->token( TK_Id ); }
+ action emit_integer_decimal { fsm->token( TK_IntegerDecimal ); }
+ action emit_integer_octal { fsm->token( TK_IntegerOctal ); }
+ action emit_integer_hex { fsm->token( TK_IntegerHex ); }
+ action emit_float { fsm->token( TK_Float ); }
+ action emit_symbol { fsm->token( fsm->tokBuf.data[0] ); }
+ action tokst { fsm->tokStart = fsm->col; }
+
+ # Single and double literals.
+ slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit;
+ dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit;
+
+ # Identifiers
+ id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+ float =
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? ) >tokst $buf %emit_float;
+
+ # Integer decimal. Leading part buffered by float.
+ integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal;
+
+ # Integer octal. Leading part buffered by float.
+ integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal;
+
+ # Integer hex. Leading 0 buffered by float.
+ integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex;
+
+ # Only buffer the second item, first buffered by symbol. */
+ namesep = '::' @buf %{fsm->token( TK_NameSep );};
+ deqs = '==' @buf %{fsm->token( TK_EqualsEquals );};
+ neqs = '!=' @buf %{fsm->token( TK_NotEquals );};
+ and_and = '&&' @buf %{fsm->token( TK_AndAnd );};
+ or_or = '||' @buf %{fsm->token( TK_OrOr );};
+ mult_assign = '*=' @buf %{fsm->token( TK_MultAssign );};
+ percent_assign = '%=' @buf %{fsm->token( TK_PercentAssign );};
+ plus_assign = '+=' @buf %{fsm->token( TK_PlusAssign );};
+ minus_assign = '-=' @buf %{fsm->token( TK_MinusAssign );};
+ amp_assign = '&=' @buf %{fsm->token( TK_AmpAssign );};
+ caret_assign = '^=' @buf %{fsm->token( TK_CaretAssign );};
+ bar_assign = '|=' @buf %{fsm->token( TK_BarAssign );};
+ plus_plus = '++' @buf %{fsm->token( TK_PlusPlus );};
+ minus_minus = '--' @buf %{fsm->token( TK_MinusMinus );};
+ arrow = '->' @buf %{fsm->token( TK_Arrow );};
+ arrow_star = '->*' @buf %{fsm->token( TK_ArrowStar );};
+ dot_star = '.*' @buf %{fsm->token( TK_DotStar );};
+
+ # Buffer both items. *
+ div_assign = '/=' @{fsm->buf('/');fsm->buf(fc);} %{fsm->token( TK_DivAssign );};
+
+ # Double dot is sent as two dots.
+ dot_dot = '..' %{fsm->token('.'); fsm->buf('.'); fsm->token('.');};
+
+ # Three char compounds, first item already buffered. */
+ dot_dot_dot = '...' %{fsm->buf('.'); fsm->buf('.'); fsm->token( TK_DotDotDot );};
+
+ # All compunds
+ compound = namesep | deqs | neqs | and_and | or_or | mult_assign |
+ div_assign | percent_assign | plus_assign | minus_assign |
+ amp_assign | caret_assign | bar_assign | plus_plus | minus_minus |
+ arrow | arrow_star | dot_star | dot_dot | dot_dot_dot;
+
+ # Single char symbols.
+ symbol =
+ ( punct - [./_"'] ) >tokst $buf %emit_symbol |
+ # Do not immediately buffer slash, may be start of comment.
+ '/' >tokst %{ fsm->buf('/'); fsm->token( '/' ); } |
+ # Dot covered by float.
+ '.' %emit_symbol;
+
+ # Comments and whitespace.
+ commc = '/*' @{fsm->pass('/'); fsm->pass('*');} ( any* $0 '*/' @1 ) $pass;
+ commcc = '//' @{fsm->pass('/'); fsm->pass('/');} ( any* $0 '\n' @1 ) $pass;
+ whitespace = ( any - ( 0 | 33..126 ) )+ $pass;
+
+ action onEOFChar {
+ /* On EOF char, write out the non token buffer. */
+ fsm->nonTokBuf.append(0);
+ cout << fsm->nonTokBuf.data;
+ fsm->nonTokBuf.clear();
+ }
+
+ # Using 0 as eof. If seeingAs a result all null characters get ignored.
+ EOF = 0 @onEOFChar;
+
+ # All outside code tokens.
+ tokens = (
+ id | slit | dlit | float | integer_decimal |
+ integer_octal | integer_hex | compound | symbol );
+ nontok = ( commc | commcc | whitespace | EOF );
+
+ position = (
+ '\n' @{ fsm->line += 1; fsm->col = 1; } |
+ [^\n] @{ fsm->col += 1; } )*;
+
+ main := ( ( tokens | nontok )** ) & position;
+}%%
+
+%% write data;
+
+void Scanner::init( )
+{
+ Scanner *fsm = this;
+ /* A count of the number of characters in
+ * a token. Used for % sequences. */
+ count = 0;
+ line = 1;
+ col = 1;
+
+ %% write init;
+}
+
+int Scanner::execute( char *data, int len )
+{
+ Scanner *fsm = this;
+ char *p = data;
+ char *pe = data + len;
+
+ %% write exec;
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+int Scanner::finish( )
+{
+ %% write eof;
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+void Scanner::token( int id )
+{
+ /* Leader. */
+ if ( nonTokBuf.length > 0 ) {
+ nonTokBuf.append(0);
+ cout << nonTokBuf.data;
+ nonTokBuf.clear();
+ }
+
+ /* Token data. */
+ tokBuf.append(0);
+ cout << '<' << id << '>' << tokBuf.data;
+ tokBuf.clear();
+}
+
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+void test( char *buf )
+{
+ Scanner scanner(cout);
+ scanner.init();
+ scanner.execute( buf, strlen(buf) );
+
+ /* The last token is ignored (because there is no next token). Send
+ * trailing null to force the last token into whitespace. */
+ char eof = 0;
+ if ( scanner.execute( &eof, 1 ) <= 0 ) {
+ cerr << "cppscan: scan failed" << endl;
+ return;
+ }
+ cout.flush();
+}
+
+int main()
+{
+ test(
+ "/*\n"
+ " * Copyright \n"
+ " */\n"
+ "\n"
+ "/* Construct an fsmmachine from a graph. */\n"
+ "RedFsmAp::RedFsmAp( FsmAp *graph, bool complete )\n"
+ ":\n"
+ " graph(graph),\n"
+ "{\n"
+ " assert( sizeof(RedTransAp) <= sizeof(TransAp) );\n"
+ "\n"
+ " reduceMachine();\n"
+ "}\n"
+ "\n"
+ "{\n"
+ " /* Get the transition that we want to extend. */\n"
+ " RedTransAp *extendTrans = list[pos].value;\n"
+ "\n"
+ " /* Look ahead in the transition list. */\n"
+ " for ( int next = pos + 1; next < list.length(); pos++, next++ ) {\n"
+ " if ( ! keyOps->eq( list[pos].highKey, nextKey ) )\n"
+ " break;\n"
+ " }\n"
+ " return false;\n"
+ "}\n"
+ "\n" );
+
+ test(
+ "->*\n"
+ ".*\n"
+ "/*\"*/\n"
+ "\"/*\"\n"
+ "L'\"'\n"
+ "L\"'\"\n" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+/*
+ * Copyright
+ */
+
+/* Construct an fsmmachine from a graph. */
+<195>RedFsmAp<197>::<195>RedFsmAp<40>( <195>FsmAp <42>*<195>graph<44>, <195>bool <195>complete <41>)
+<58>:
+ <195>graph<40>(<195>graph<41>)<44>,
+<123>{
+ <195>assert<40>( <195>sizeof<40>(<195>RedTransAp<41>) <60><<61>= <195>sizeof<40>(<195>TransAp<41>) <41>)<59>;
+
+ <195>reduceMachine<40>(<41>)<59>;
+<125>}
+
+<123>{
+ /* Get the transition that we want to extend. */
+ <195>RedTransAp <42>*<195>extendTrans <61>= <195>list<91>[<195>pos<93>]<46>.<195>value<59>;
+
+ /* Look ahead in the transition list. */
+ <195>for <40>( <195>int <195>next <61>= <195>pos <43>+ <218>1<59>; <195>next <60>< <195>list<46>.<195>length<40>(<41>)<59>; <195>pos<212>++<44>, <195>next<212>++ <41>) <123>{
+ <195>if <40>( <33>! <195>keyOps<211>-><195>eq<40>( <195>list<91>[<195>pos<93>]<46>.<195>highKey<44>, <195>nextKey <41>) <41>)
+ <195>break<59>;
+ <125>}
+ <195>return <195>false<59>;
+<125>}
+
+<214>->*
+<215>.*
+/*"*/
+<192>"/*"
+<193>L'"'
+<192>L"'"
+#endif
diff --git a/test/cppscan2.rl b/test/cppscan2.rl
new file mode 100644
index 0000000..07fc01b
--- /dev/null
+++ b/test/cppscan2.rl
@@ -0,0 +1,402 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+using namespace std;
+
+#define TK_Dlit 192
+#define TK_Slit 193
+#define TK_Float 194
+#define TK_Id 195
+#define TK_NameSep 197
+#define TK_Arrow 211
+#define TK_PlusPlus 212
+#define TK_MinusMinus 213
+#define TK_ArrowStar 214
+#define TK_DotStar 215
+#define TK_ShiftLeft 216
+#define TK_ShiftRight 217
+#define TK_IntegerDecimal 218
+#define TK_IntegerOctal 219
+#define TK_IntegerHex 220
+#define TK_EqualsEquals 223
+#define TK_NotEquals 224
+#define TK_AndAnd 225
+#define TK_OrOr 226
+#define TK_MultAssign 227
+#define TK_DivAssign 228
+#define TK_PercentAssign 229
+#define TK_PlusAssign 230
+#define TK_MinusAssign 231
+#define TK_AmpAssign 232
+#define TK_CaretAssign 233
+#define TK_BarAssign 234
+#define TK_DotDotDot 240
+#define TK_Whitespace 241
+#define TK_Comment 242
+
+#define BUFSIZE 4096
+
+int tok;
+char buf[BUFSIZE], *tokstart, *tokend;
+void token( char *data, int len );
+bool discard = false;
+
+struct Scanner
+{
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ int init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine Scanner;
+
+ # Single and double literals.
+ slit = ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) @{tok = TK_Slit;};
+ dlit = ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) @{tok = TK_Dlit;};
+
+ # Identifiers
+ id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) @{tok = TK_Id;};
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+ float =
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? ) @{tok = TK_Float;};
+
+ # Integer decimal. Leading part buffered by float.
+ integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) @{tok = TK_IntegerDecimal;};
+
+ # Integer octal. Leading part buffered by float.
+ integer_octal = ( '0' [0-9]+ [ulUL]{0,2} ) @{tok = TK_IntegerOctal;};
+
+ # Integer hex. Leading 0 buffered by float.
+ integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) @{tok = TK_IntegerHex;};
+
+ # Only buffer the second item, first buffered by symbol. */
+ namesep = '::' @{tok = TK_NameSep;};
+ deqs = '==' @{tok = TK_EqualsEquals;};
+ neqs = '!=' @{tok = TK_NotEquals;};
+ and_and = '&&' @{tok = TK_AndAnd;};
+ or_or = '||' @{tok = TK_OrOr;};
+ mult_assign = '*=' @{tok = TK_MultAssign;};
+ div_assign = '/=' @{tok = TK_DivAssign;};
+ percent_assign = '%=' @{tok = TK_PercentAssign;};
+ plus_assign = '+=' @{tok = TK_PlusAssign;};
+ minus_assign = '-=' @{tok = TK_MinusAssign;};
+ amp_assign = '&=' @{tok = TK_AmpAssign;};
+ caret_assign = '^=' @{tok = TK_CaretAssign;};
+ bar_assign = '|=' @{tok = TK_BarAssign;};
+ plus_plus = '++' @{tok = TK_PlusPlus;};
+ minus_minus = '--' @{tok = TK_MinusMinus;};
+ arrow = '->' @{tok = TK_Arrow;};
+ arrow_star = '->*' @{tok = TK_ArrowStar;};
+ dot_star = '.*' @{tok = TK_DotStar;};
+
+ # Three char compounds, first item already buffered. */
+ dot_dot_dot = '...' @{tok = TK_DotDotDot;};
+
+ # All compunds
+ compound = namesep | deqs | neqs | and_and | or_or | mult_assign |
+ div_assign | percent_assign | plus_assign | minus_assign |
+ amp_assign | caret_assign | bar_assign | plus_plus | minus_minus |
+ arrow | arrow_star | dot_star | dot_dot_dot;
+
+ # Single char symbols.
+ symbol = ( punct - [_"'] ) @{tok = fc;};
+
+ action discard {
+ discard = true;
+ }
+
+ # Comments and whitespace.
+ commc = '/*' @discard ( any* $0 '*/' @1 ) @{tok = TK_Comment;};
+ commcc = '//' @discard ( any* $0 '\n' @1 ) @{tok = TK_Comment;};
+ whitespace = ( any - 33..126 )+ >discard @{tok = TK_Whitespace;};
+
+ # All outside code tokens.
+ tokens = (
+ id | slit | dlit | float | integer_decimal |
+ integer_octal | integer_hex | compound | symbol |
+ commc | commcc | whitespace );
+
+ action onError {
+ if ( tok != 0 ) {
+ char *rst_data;
+
+ if ( tok == TK_Comment || tok == TK_Whitespace ) {
+ /* Reset comment status, don't send. */
+ discard = false;
+
+ /* Restart right at the error point if consuming whitespace or
+ * a comment. Consume may have spanned multiple buffers. */
+ rst_data = fpc;
+ }
+ else {
+ /* Send the token. */
+ token( tokstart, tokend - tokstart + 1 );
+
+ /* Restart right after the token. */
+ rst_data = tokend+1;
+ }
+
+ tokstart = 0;
+ fexec rst_data;
+ fgoto main;
+ }
+ }
+
+ main := tokens >{tokstart=fpc;} @{tokend=fpc;} $!onError;
+}%%
+
+%% write data;
+
+int Scanner::init( )
+{
+ tok = 0;
+ tokstart = 0;
+ tokend = 0;
+
+ %% write init;
+ return 1;
+}
+
+int Scanner::execute( char *data, int len )
+{
+ char *p = data;
+ char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+int Scanner::finish( )
+{
+ %% write eof;
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+
+void token( char *data, int len )
+{
+ cout << "<" << tok << "> ";
+ for ( int i = 0; i < len; i++ )
+ cout << data[i];
+ cout << '\n';
+}
+
+void test( char * data )
+{
+ Scanner scanner;
+ scanner.init();
+ scanner.execute( data, strlen(data) );
+ scanner.finish();
+ if ( tok != 0 && tok != TK_Comment && tok != TK_Whitespace )
+ token( tokstart, tokend - tokstart + 1 );
+}
+
+int main()
+{
+ test(
+ "/*\n"
+ " * Copyright \n"
+ " */\n"
+ "\n"
+ "\n"
+ "/* Move ranges to the singles list. */\n"
+ "void RedFsmAp::move( RedStateAp *state )\n"
+ "{\n"
+ " RedTranst &range = state->outRange;\n"
+ " for ( int rpos = 0; rpos < range.length(); ) {\n"
+ " if ( can( range, rpos ) ) {\n"
+ " while ( range[rpos].value != range[rpos+1].value ) {\n"
+ " single.append( range[rpos+1] );\n"
+ " }\n"
+ " \n"
+ " range[rpos].highKey = range[rpos+1].highKey;\n"
+ " }\n"
+ " else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {\n"
+ " single.append( range[rpos] );\n"
+ " }\n"
+ " }\n"
+ "}\n"
+ "\n" );
+
+ test(
+ "->*\n"
+ ".*\n"
+ "/*\"*/\n"
+ "\"/*\"\n"
+ "L'\"'\n"
+ "L\"'\"\n"
+ "...\n" );
+}
+
+#ifdef _____OUTPUT_____
+<195> void
+<195> RedFsmAp
+<197> ::
+<195> move
+<40> (
+<195> RedStateAp
+<42> *
+<195> state
+<41> )
+<123> {
+<195> RedTranst
+<38> &
+<195> range
+<61> =
+<195> state
+<211> ->
+<195> outRange
+<59> ;
+<195> for
+<40> (
+<195> int
+<195> rpos
+<61> =
+<218> 0
+<59> ;
+<195> rpos
+<60> <
+<195> range
+<46> .
+<195> length
+<40> (
+<41> )
+<59> ;
+<41> )
+<123> {
+<195> if
+<40> (
+<195> can
+<40> (
+<195> range
+<44> ,
+<195> rpos
+<41> )
+<41> )
+<123> {
+<195> while
+<40> (
+<195> range
+<91> [
+<195> rpos
+<93> ]
+<46> .
+<195> value
+<224> !=
+<195> range
+<91> [
+<195> rpos
+<43> +
+<218> 1
+<93> ]
+<46> .
+<195> value
+<41> )
+<123> {
+<195> single
+<46> .
+<195> append
+<40> (
+<195> range
+<91> [
+<195> rpos
+<43> +
+<218> 1
+<93> ]
+<41> )
+<59> ;
+<125> }
+<195> range
+<91> [
+<195> rpos
+<93> ]
+<46> .
+<195> highKey
+<61> =
+<195> range
+<91> [
+<195> rpos
+<43> +
+<218> 1
+<93> ]
+<46> .
+<195> highKey
+<59> ;
+<125> }
+<195> else
+<195> if
+<40> (
+<195> keyOps
+<211> ->
+<195> span
+<40> (
+<195> range
+<91> [
+<195> rpos
+<93> ]
+<46> .
+<195> lowKey
+<44> ,
+<195> range
+<91> [
+<195> rpos
+<93> ]
+<46> .
+<195> highKey
+<41> )
+<223> ==
+<218> 1
+<41> )
+<123> {
+<195> single
+<46> .
+<195> append
+<40> (
+<195> range
+<91> [
+<195> rpos
+<93> ]
+<41> )
+<59> ;
+<125> }
+<125> }
+<125> }
+<214> ->*
+<215> .*
+<192> "/*"
+<193> L'"'
+<192> L"'"
+<240> ...
+#endif
diff --git a/test/cppscan3.rl b/test/cppscan3.rl
new file mode 100644
index 0000000..afe832d
--- /dev/null
+++ b/test/cppscan3.rl
@@ -0,0 +1,281 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using namespace std;
+
+#define TK_Dlit 192
+#define TK_Slit 193
+#define TK_Float 194
+#define TK_Id 195
+#define TK_NameSep 197
+#define TK_Arrow 211
+#define TK_PlusPlus 212
+#define TK_MinusMinus 213
+#define TK_ArrowStar 214
+#define TK_DotStar 215
+#define TK_ShiftLeft 216
+#define TK_ShiftRight 217
+#define TK_IntegerDecimal 218
+#define TK_IntegerOctal 219
+#define TK_IntegerHex 220
+#define TK_EqualsEquals 223
+#define TK_NotEquals 224
+#define TK_AndAnd 225
+#define TK_OrOr 226
+#define TK_MultAssign 227
+#define TK_DivAssign 228
+#define TK_PercentAssign 229
+#define TK_PlusAssign 230
+#define TK_MinusAssign 231
+#define TK_AmpAssign 232
+#define TK_CaretAssign 233
+#define TK_BarAssign 234
+#define TK_DotDotDot 240
+#define TK_Whitespace 241
+#define TK_Comment 242
+
+#define BUFSIZE 4096
+
+char buf[BUFSIZE];
+
+struct Scanner
+{
+ int cs, act;
+ char *tokstart, *tokend;
+
+ void token( int tok );
+ void run();
+
+ void init( );
+ void execute( char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine Scanner;
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
+ => { token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
+ => { token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ =>{ token( TK_Id );};
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? )
+ => { token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
+ => { token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} )
+ => { token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
+ => { token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' => {token( TK_NameSep );};
+ '==' => {token( TK_EqualsEquals );};
+ '!=' => {token( TK_NotEquals );};
+ '&&' => {token( TK_AndAnd );};
+ '||' => {token( TK_OrOr );};
+ '*=' => {token( TK_MultAssign );};
+ '/=' => {token( TK_DivAssign );};
+ '%=' => {token( TK_PercentAssign );};
+ '+=' => {token( TK_PlusAssign );};
+ '-=' => {token( TK_MinusAssign );};
+ '&=' => {token( TK_AmpAssign );};
+ '^=' => {token( TK_CaretAssign );};
+ '|=' => {token( TK_BarAssign );};
+ '++' => {token( TK_PlusPlus );};
+ '--' => {token( TK_MinusMinus );};
+ '->' => {token( TK_Arrow );};
+ '->*' => {token( TK_ArrowStar );};
+ '.*' => {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' => { token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) => { token( tokstart[0] );};
+
+ action comment {
+ token( TK_Comment );
+ }
+
+ # Comments and whitespace.
+ '/*' ( any* $0 '*/' @1 ) => comment;
+ '//' ( any* $0 '\n' @1 ) => comment;
+ ( any - 33..126 )+ => { token( TK_Whitespace );};
+
+ *|;
+}%%
+
+%% write data;
+
+void Scanner::init( )
+{
+ %% write init;
+}
+
+/* Returns the count of bytes still in the buffer
+ * (shifted to the biginning) */
+void Scanner::execute( char *data, int len )
+{
+ char *p = data;
+ char *pe = data + len;
+
+ %% write exec;
+}
+
+int Scanner::finish( )
+{
+ %% write eof;
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+
+void Scanner::token( int tok )
+{
+ const char *data = tokstart;
+ int len = tokend - tokstart;
+ cout << "<" << tok << "> ";
+ for ( int i = 0; i < len; i++ )
+ cout << data[i];
+ cout << '\n';
+}
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ std::ios::sync_with_stdio(false);
+ Scanner scanner;
+ scanner.init();
+
+ scanner.execute( buf, len );
+ if ( scanner.cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cout << "PARSE ERROR" << endl;
+ }
+
+ /* FIXME: Last token may get lost. */
+ scanner.finish();
+}
+
+int main()
+{
+ test(
+ "\"\\\"hi\" /*\n"
+ "*/\n"
+ "44 .44\n"
+ "44. 44\n"
+ "44 . 44\n"
+ "44.44\n"
+ "_hithere22\n"
+ "\n"
+ );
+
+ test(
+ "'\\''\"\\n\\d'\\\"\"\n"
+ "hi\n"
+ "99\n"
+ ".99\n"
+ "99e-4\n"
+ "->*\n"
+ "||\n"
+ "0x98\n"
+ "0x\n"
+ "//\n"
+ "/* * */\n"
+ );
+
+ test(
+ "'\n"
+ "'\n"
+ );
+
+}
+
+#ifdef _____OUTPUT_____
+<192> "\"hi"
+<241>
+<242> /*
+*/
+<241>
+
+<218> 44
+<241>
+<194> .44
+<241>
+
+<194> 44.
+<241>
+<218> 44
+<241>
+
+<218> 44
+<241>
+<46> .
+<241>
+<218> 44
+<241>
+
+<194> 44.44
+<241>
+
+<195> _hithere22
+<193> '\''
+<192> "\n\d'\""
+<241>
+
+<195> hi
+<241>
+
+<218> 99
+<241>
+
+<194> .99
+<241>
+
+<194> 99e-4
+<241>
+
+<214> ->*
+<241>
+
+<226> ||
+<241>
+
+<220> 0x98
+<241>
+
+<218> 0
+<195> x
+<241>
+
+<242> //
+
+<242> /* * */
+PARSE ERROR
+#endif
diff --git a/test/cppscan4.rl b/test/cppscan4.rl
new file mode 100644
index 0000000..fa7499f
--- /dev/null
+++ b/test/cppscan4.rl
@@ -0,0 +1,303 @@
+/*
+ * @LANG: d
+ */
+
+module cppscan;
+
+import std.c.stdio;
+import std.string;
+
+const int BUFSIZE = 2048;
+
+const int TK_Dlit = 192;
+const int TK_Slit = 193;
+const int TK_Float = 194;
+const int TK_Id = 195;
+const int TK_NameSep = 197;
+const int TK_Arrow = 211;
+const int TK_PlusPlus = 212;
+const int TK_MinusMinus = 213;
+const int TK_ArrowStar = 214;
+const int TK_DotStar = 215;
+const int TK_ShiftLeft = 216;
+const int TK_ShiftRight = 217;
+const int TK_IntegerDecimal = 218;
+const int TK_IntegerOctal = 219;
+const int TK_IntegerHex = 220;
+const int TK_EqualsEquals = 223;
+const int TK_NotEquals = 224;
+const int TK_AndAnd = 225;
+const int TK_OrOr = 226;
+const int TK_MultAssign = 227;
+const int TK_DivAssign = 228;
+const int TK_PercentAssign = 229;
+const int TK_PlusAssign = 230;
+const int TK_MinusAssign = 231;
+const int TK_AmpAssign = 232;
+const int TK_CaretAssign = 233;
+const int TK_BarAssign = 234;
+const int TK_DotDotDot = 240;
+
+
+class Scanner
+{
+ int line, col;
+ int tokStart;
+ int inlineDepth;
+ int count;
+ char[] tokBuf;
+ char[] nonTokBuf;
+
+ void pass(char c) { nonTokBuf ~= c; }
+ void buf(char c) { tokBuf ~= c; }
+ void token( int id )
+ {
+ /* Leader. */
+ if ( nonTokBuf.length > 0 ) {
+ printf("%.*s", nonTokBuf);
+ nonTokBuf = "";
+ }
+
+ /* Token data. */
+ printf("<%d>%.*s", id, tokBuf);
+
+ tokBuf = "";
+ }
+
+ int cs, stack, top;
+
+ %%{
+ machine Scanner;
+
+ action pass { pass(fc); }
+ action buf { buf(fc); }
+
+ action emit_slit { token( TK_Slit ); }
+ action emit_dlit { token( TK_Dlit ); }
+ action emit_id { token( TK_Id ); }
+ action emit_integer_decimal { token( TK_IntegerDecimal ); }
+ action emit_integer_octal { token( TK_IntegerOctal ); }
+ action emit_integer_hex { token( TK_IntegerHex ); }
+ action emit_float { token( TK_Float ); }
+ action emit_symbol { token( tokBuf[0] ); }
+ action tokst { tokStart = col; }
+
+ # Single and double literals.
+ slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit;
+ dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit;
+
+ # Identifiers
+ id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+ float =
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? ) >tokst $buf %emit_float;
+
+ # Integer decimal. Leading part buffered by float.
+ integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal;
+
+ # Integer octal. Leading part buffered by float.
+ integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal;
+
+ # Integer hex. Leading 0 buffered by float.
+ integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex;
+
+ # Only buffer the second item, first buffered by symbol. */
+ namesep = '::' @buf %{token( TK_NameSep );};
+ deqs = '==' @buf %{token( TK_EqualsEquals );};
+ neqs = '!=' @buf %{token( TK_NotEquals );};
+ and_and = '&&' @buf %{token( TK_AndAnd );};
+ or_or = '||' @buf %{token( TK_OrOr );};
+ mult_assign = '*=' @buf %{token( TK_MultAssign );};
+ percent_assign = '%=' @buf %{token( TK_PercentAssign );};
+ plus_assign = '+=' @buf %{token( TK_PlusAssign );};
+ minus_assign = '-=' @buf %{token( TK_MinusAssign );};
+ amp_assign = '&=' @buf %{token( TK_AmpAssign );};
+ caret_assign = '^=' @buf %{token( TK_CaretAssign );};
+ bar_assign = '|=' @buf %{token( TK_BarAssign );};
+ plus_plus = '++' @buf %{token( TK_PlusPlus );};
+ minus_minus = '--' @buf %{token( TK_MinusMinus );};
+ arrow = '->' @buf %{token( TK_Arrow );};
+ arrow_star = '->*' @buf %{token( TK_ArrowStar );};
+ dot_star = '.*' @buf %{token( TK_DotStar );};
+
+ # Buffer both items. *
+ div_assign = '/=' @{buf('/');buf(fc);} %{token( TK_DivAssign );};
+
+ # Double dot is sent as two dots.
+ dot_dot = '..' %{token('.'); buf('.'); token('.');};
+
+ # Three char compounds, first item already buffered. */
+ dot_dot_dot = '...' %{buf('.'); buf('.'); token( TK_DotDotDot );};
+
+ # All compunds
+ compound = namesep | deqs | neqs | and_and | or_or | mult_assign |
+ div_assign | percent_assign | plus_assign | minus_assign |
+ amp_assign | caret_assign | bar_assign | plus_plus | minus_minus |
+ arrow | arrow_star | dot_star | dot_dot | dot_dot_dot;
+
+ # Single char symbols.
+ symbol =
+ ( punct - [./_"'] ) >tokst $buf %emit_symbol |
+ # Do not immediately buffer slash, may be start of comment.
+ '/' >tokst %{ buf('/'); token( '/' ); } |
+ # Dot covered by float.
+ '.' %emit_symbol;
+
+ # Comments and whitespace.
+ commc = '/*' @{pass('/'); pass('*');} ( any* $0 '*/' @1 ) $pass;
+ commcc = '//' @{pass('/'); pass('/');} ( any* $0 '\n' @1 ) $pass;
+ whitespace = ( any - ( 0 | 33..126 ) )+ $pass;
+
+ action onEOFChar {
+ /* On EOF char, write out the non token buffer. */
+ printf("%.*s", nonTokBuf);
+ nonTokBuf = "";
+ }
+
+ # Using 0 as eof. If seeingAs a result all null characters get ignored.
+ EOF = 0 @onEOFChar;
+
+ # All outside code tokens.
+ tokens = (
+ id | slit | dlit | float | integer_decimal |
+ integer_octal | integer_hex | compound | symbol );
+ nontok = ( commc | commcc | whitespace | EOF );
+
+ position = (
+ '\n' @{ line += 1; col = 1; } |
+ [^\n] @{ col += 1; } )*;
+
+ main := ( ( tokens | nontok )** ) & position;
+ }%%
+
+ %% write data noprefix;
+
+ void init( )
+ {
+ /* A count of the number of characters in
+ * a token. Used for % sequences. */
+ count = 0;
+ line = 1;
+ col = 1;
+ %% write init;
+ return 1;
+ }
+
+ int execute( char* _data, int _len )
+ {
+ char *p = _data;
+ char *pe = _data + _len;
+
+ %% write exec;
+
+ if ( cs == error )
+ return -1;
+ if ( cs >= first_final )
+ return 1;
+ return 0;
+ }
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( )
+ {
+ %% write eof;
+
+ if ( cs == error )
+ return -1;
+ if ( cs >= first_final )
+ return 1;
+ return 0;
+ }
+};
+
+void test(char[] buf)
+{
+ Scanner scanner = new Scanner();
+ scanner.init();
+ scanner.execute( buf.ptr, buf.length );
+
+ /* The last token is ignored (because there is no next token). Send
+ * trailing null to force the last token into whitespace. */
+ char eof_char = 0;
+ if ( scanner.execute( &eof_char, 1 ) <= 0 ) {
+ fprintf(stderr, "cppscan: scan failed\n");
+ }
+}
+
+int main()
+{
+ test(
+ "/*\n"
+ " * Copyright \n"
+ " */\n"
+ "\n"
+ "RedTransAp *RedFsmAp::reduceTrans( TransAp *trans )\n"
+ "{\n"
+ " RedAction *action = 0;\n"
+ " if ( trans->actionTable.length() > 0 ) {\n"
+ " if ( actionMap.insert( trans->actionTable, &action ) )\n"
+ " action->id = nextActionId++;\n"
+ " }\n"
+ " \n"
+ " RedStateAp *targ = (RedStateAp*)trans->toState;\n"
+ " if ( action == 0 ) {\n"
+ " delete trans;\n"
+ " return 0;\n"
+ " }\n"
+ "\n"
+ " trans->~TransAp();\n"
+ " inDict = new(trans) RedTransAp( targ, action, nextTransId++ );\n"
+ " transSet.insert( inDict );\n"
+ "}\n"
+ );
+
+ test(
+ "->*\n"
+ ".*\n"
+ "/*\"*/\n"
+ "\"/*\"\n"
+ "L'\"'\n"
+ "L\"'\"\n"
+ );
+
+ return 0;
+}
+
+/+ _____OUTPUT_____
+/*
+ * Copyright
+ */
+
+<195>RedTransAp <42>*<195>RedFsmAp<197>::<195>reduceTrans<40>( <195>TransAp <42>*<195>trans <41>)
+<123>{
+ <195>RedAction <42>*<195>action <61>= <218>0<59>;
+ <195>if <40>( <195>trans<211>-><195>actionTable<46>.<195>length<40>(<41>) <62>> <218>0 <41>) <123>{
+ <195>if <40>( <195>actionMap<46>.<195>insert<40>( <195>trans<211>-><195>actionTable<44>, <38>&<195>action <41>) <41>)
+ <195>action<211>-><195>id <61>= <195>nextActionId<212>++<59>;
+ <125>}
+
+ <195>RedStateAp <42>*<195>targ <61>= <40>(<195>RedStateAp<42>*<41>)<195>trans<211>-><195>toState<59>;
+ <195>if <40>( <195>action <223>== <218>0 <41>) <123>{
+ <195>delete <195>trans<59>;
+ <195>return <218>0<59>;
+ <125>}
+
+ <195>trans<211>-><126>~<195>TransAp<40>(<41>)<59>;
+ <195>inDict <61>= <195>new<40>(<195>trans<41>) <195>RedTransAp<40>( <195>targ<44>, <195>action<44>, <195>nextTransId<212>++ <41>)<59>;
+ <195>transSet<46>.<195>insert<40>( <195>inDict <41>)<59>;
+<125>}
+<214>->*
+<215>.*
+/*"*/
+<192>"/*"
+<193>L'"'
+<192>L"'"
++++++++++++++++++/
diff --git a/test/cppscan5.rl b/test/cppscan5.rl
new file mode 100644
index 0000000..3c0035b
--- /dev/null
+++ b/test/cppscan5.rl
@@ -0,0 +1,277 @@
+/*
+ * @LANG: d
+ */
+
+/*
+ * Test in and out state actions.
+ */
+
+import std.c.stdio;
+import std.string;
+
+static const int TK_Dlit = 192;
+static const int TK_Slit = 193;
+static const int TK_Float = 194;
+static const int TK_Id = 195;
+static const int TK_NameSep = 197;
+static const int TK_Arrow = 211;
+static const int TK_PlusPlus = 212;
+static const int TK_MinusMinus = 213;
+static const int TK_ArrowStar = 214;
+static const int TK_DotStar = 215;
+static const int TK_ShiftLeft = 216;
+static const int TK_ShiftRight = 217;
+static const int TK_IntegerDecimal = 218;
+static const int TK_IntegerOctal = 219;
+static const int TK_IntegerHex = 220;
+static const int TK_EqualsEquals = 223;
+static const int TK_NotEquals = 224;
+static const int TK_AndAnd = 225;
+static const int TK_OrOr = 226;
+static const int TK_MultAssign = 227;
+static const int TK_DivAssign = 228;
+static const int TK_PercentAssign = 229;
+static const int TK_PlusAssign = 230;
+static const int TK_MinusAssign = 231;
+static const int TK_AmpAssign = 232;
+static const int TK_CaretAssign = 233;
+static const int TK_BarAssign = 234;
+static const int TK_DotDotDot = 240;
+static const int TK_Whitespace = 241;
+static const int TK_Comment = 242;
+
+class Scanner
+{
+ int cs, act;
+ char *tokstart, tokend;
+
+ void token( int tok )
+ {
+ char *data = tokstart;
+ int len = tokend - tokstart;
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ printf( "%c", data[i] );
+ printf( "\n" );
+ }
+
+ %%{
+
+ machine Scanner;
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
+ => { token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
+ => { token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ =>{ token( TK_Id );};
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? )
+ => { token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
+ => { token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} )
+ => { token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
+ => { token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' => {token( TK_NameSep );};
+ '==' => {token( TK_EqualsEquals );};
+ '!=' => {token( TK_NotEquals );};
+ '&&' => {token( TK_AndAnd );};
+ '||' => {token( TK_OrOr );};
+ '*=' => {token( TK_MultAssign );};
+ '/=' => {token( TK_DivAssign );};
+ '%=' => {token( TK_PercentAssign );};
+ '+=' => {token( TK_PlusAssign );};
+ '-=' => {token( TK_MinusAssign );};
+ '&=' => {token( TK_AmpAssign );};
+ '^=' => {token( TK_CaretAssign );};
+ '|=' => {token( TK_BarAssign );};
+ '++' => {token( TK_PlusPlus );};
+ '--' => {token( TK_MinusMinus );};
+ '->' => {token( TK_Arrow );};
+ '->*' => {token( TK_ArrowStar );};
+ '.*' => {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' => { token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) => { token( tokstart[0] );};
+
+ action comment {
+ token( TK_Comment );
+ }
+
+ # Comments and whitespace.
+ '/*' ( any* $0 '*/' @1 ) => comment;
+ '//' ( any* $0 '\n' @1 ) => comment;
+ ( any - 33..126 )+ => { token( TK_Whitespace );};
+
+ *|;
+
+ }%%
+
+ %% write data noprefix;
+
+ void init( )
+ {
+ %% write init;
+ }
+
+ void execute( char* data, int len )
+ {
+ char *p = data;
+ char *pe = data + len;
+
+ %% write exec;
+ }
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( )
+ {
+ %% write eof;
+
+ if ( cs == error )
+ return -1;
+ if ( cs >= first_final )
+ return 1;
+ return 0;
+ }
+};
+
+static const int BUFSIZE = 12;
+
+void test( char buf[] )
+{
+ Scanner scanner = new Scanner();
+ scanner.init();
+
+ scanner.execute( buf.ptr, buf.length );
+ if ( scanner.cs == Scanner.error ) {
+ /* Machine failed before finding a token. */
+ printf("PARSE ERROR\n");
+ }
+ scanner.finish();
+ return 0;
+}
+
+int main()
+{
+ test(
+ "\"\\\"hi\" /*\n"
+ "*/\n"
+ "44 .44\n"
+ "44. 44\n"
+ "44 . 44\n"
+ "44.44\n"
+ "_hithere22\n"
+ "\n"
+ );
+
+ test(
+ "'\\''\"\\n\\d'\\\"\"\n"
+ "hi\n"
+ "99\n"
+ ".99\n"
+ "99e-4\n"
+ "->*\n"
+ "||\n"
+ "0x98\n"
+ "0x\n"
+ "//\n"
+ "/* * */\n"
+ );
+
+ test(
+ "'\n"
+ "'\n"
+ );
+
+ return 0;
+}
+
+/+ _____OUTPUT_____
+<192> "\"hi"
+<241>
+<242> /*
+*/
+<241>
+
+<218> 44
+<241>
+<194> .44
+<241>
+
+<194> 44.
+<241>
+<218> 44
+<241>
+
+<218> 44
+<241>
+<46> .
+<241>
+<218> 44
+<241>
+
+<194> 44.44
+<241>
+
+<195> _hithere22
+<193> '\''
+<192> "\n\d'\""
+<241>
+
+<195> hi
+<241>
+
+<218> 99
+<241>
+
+<194> .99
+<241>
+
+<194> 99e-4
+<241>
+
+<214> ->*
+<241>
+
+<226> ||
+<241>
+
+<220> 0x98
+<241>
+
+<218> 0
+<195> x
+<241>
+
+<242> //
+
+<242> /* * */
+PARSE ERROR
++++++++++++++++++++/
diff --git a/test/element1.rl b/test/element1.rl
new file mode 100644
index 0000000..3113058
--- /dev/null
+++ b/test/element1.rl
@@ -0,0 +1,108 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+using namespace std;
+
+struct LangEl
+{
+ int key;
+ char *name;
+};
+
+struct Fsm
+{
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ int init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( LangEl *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+
+};
+
+%%{
+ machine Fsm;
+
+ alphtype int;
+ getkey fpc->key;
+
+ action a1 {}
+ action a2 {}
+ action a3 {}
+
+ main := ( 1 2* 3 )
+ ${cout << fpc->name << endl;}
+ %/{cout << "accept" << endl;};
+}%%
+
+%% write data;
+
+int Fsm::init( )
+{
+ %% write init;
+ return 0;
+}
+
+int Fsm::execute( LangEl *_data, int _len )
+{
+ LangEl *p = _data;
+ LangEl *pe = _data+_len;
+ %% write exec;
+
+ if ( cs == Fsm_error )
+ return -1;
+ if ( cs >= Fsm_first_final )
+ return 1;
+ return 0;
+}
+
+int Fsm::finish( )
+{
+ %% write eof;
+
+ if ( cs == Fsm_error )
+ return -1;
+ if ( cs >= Fsm_first_final )
+ return 1;
+ return 0;
+}
+
+int main( )
+{
+ static Fsm fsm;
+ static LangEl lel[] = {
+ {1, "one"},
+ {2, "two-a"},
+ {2, "two-b"},
+ {2, "two-c"},
+ {3, "three"}
+ };
+
+ fsm.init();
+ fsm.execute( lel, 5 );
+ fsm.finish();
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+one
+two-a
+two-b
+two-c
+three
+accept
+#endif
diff --git a/test/element2.rl b/test/element2.rl
new file mode 100644
index 0000000..55f7610
--- /dev/null
+++ b/test/element2.rl
@@ -0,0 +1,84 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+
+struct LangEl
+{
+ int key;
+ char *name;
+};
+
+struct fsm
+{
+ int cs;
+};
+
+%%{
+ machine fsm;
+ alphtype int;
+ getkey fpc->key;
+ variable curstate fsm->cs;
+
+ action a1 {}
+ action a2 {}
+ action a3 {}
+
+ main := ( 1 2* 3 )
+ ${printf("%s\n", fpc->name);}
+ %/{printf("accept\n");};
+}%%
+
+%% write data;
+
+void fsm_init( struct fsm *fsm )
+{
+ %% write init;
+}
+
+void fsm_execute( struct fsm *fsm, struct LangEl *_data, int _len )
+{
+ struct LangEl *p = _data;
+ struct LangEl *pe = _data+_len;
+
+ %% write exec;
+}
+
+int fsm_finish( struct fsm *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == fsm_error )
+ return -1;
+ if ( fsm->cs >= fsm_first_final )
+ return 1;
+ return 0;
+}
+
+int main()
+{
+ static struct fsm fsm;
+ static struct LangEl lel[] = {
+ {1, "one"},
+ {2, "two-a"},
+ {2, "two-b"},
+ {2, "two-c"},
+ {3, "three"}
+ };
+
+ fsm_init( &fsm );
+ fsm_execute( &fsm, lel, 5 );
+ fsm_finish( &fsm );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+one
+two-a
+two-b
+two-c
+three
+accept
+#endif
diff --git a/test/element3.rl b/test/element3.rl
new file mode 100644
index 0000000..773e801
--- /dev/null
+++ b/test/element3.rl
@@ -0,0 +1,144 @@
+/*
+ * @LANG: obj-c
+ */
+
+#include <stdio.h>
+#include <objc/Object.h>
+
+struct LangEl
+{
+ int key;
+ char *name;
+};
+
+@interface Fsm : Object
+{
+@public
+ int cs;
+};
+
+// Initialize the machine. Invokes any init statement blocks. Returns 0
+// if the machine begins in a non-accepting state and 1 if the machine
+// begins in an accepting state.
+- (int) initFsm;
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (int) executeWithData:( struct LangEl *)data len:(int)len;
+
+// Indicate that there is no more data. Returns -1 if the machine finishes
+// in the error state and does not accept, 0 if the machine finishes
+// in any other non-accepting state and 1 if the machine finishes in an
+// accepting state.
+- (int) finish;
+
+@end;
+
+
+@implementation Fsm
+
+%%{
+ machine Fsm;
+
+ alphtype int;
+ getkey fpc->key;
+
+ action a1 {}
+ action a2 {}
+ action a3 {}
+
+ main := ( 1 2* 3 )
+ ${printf("%s\n", fpc->name);}
+ %/{printf("accept\n");};
+}%%
+
+%% write data;
+
+- (int) initFsm;
+{
+ %% write init;
+ return 0;
+}
+
+- (int) executeWithData:( struct LangEl *)_data len:(int)_len;
+{
+ struct LangEl *p = _data;
+ struct LangEl *pe = _data + _len;
+ %% write exec;
+
+ if ( self->cs == Fsm_error )
+ return -1;
+ return ( self->cs >= Fsm_first_final ) ? 1 : 0;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( self->cs == Fsm_error )
+ return -1;
+ return ( self->cs >= Fsm_first_final ) ? 1 : 0;
+}
+
+
+@end
+
+int main()
+{
+ static Fsm *fsm;
+ static struct LangEl lel[] = {
+ {1, "one"},
+ {2, "two-a"},
+ {2, "two-b"},
+ {2, "two-c"},
+ {3, "three"}
+ };
+
+ fsm = [[Fsm alloc] init];
+ [fsm initFsm];
+ [fsm executeWithData:lel len:5];
+ [fsm finish];
+
+ return 0;
+}
+
+@interface Fsm2 : Object
+{
+ // The current state may be read and written to from outside of the
+ // machine. From within action code, curs is -1 and writing to it has no
+ // effect.
+ @public
+ int cs;
+
+ @protected
+
+}
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (int)
+executeWithElements:(int) elements
+length:(unsigned)length;
+
+@end
+
+@implementation Fsm2
+- (int)
+executeWithElements:(int)elements
+length:(unsigned)length;
+{
+ return 0;
+}
+@end
+
+#ifdef _____OUTPUT_____
+one
+two-a
+two-b
+two-c
+three
+accept
+#endif
diff --git a/test/eofact.h b/test/eofact.h
new file mode 100644
index 0000000..d547f87
--- /dev/null
+++ b/test/eofact.h
@@ -0,0 +1,9 @@
+#ifndef _EOFACT_H
+#define _EOFACT_H
+
+struct eofact
+{
+ int cs;
+};
+
+#endif
diff --git a/test/eofact.rl b/test/eofact.rl
new file mode 100644
index 0000000..890b73c
--- /dev/null
+++ b/test/eofact.rl
@@ -0,0 +1,50 @@
+/*
+ * @LANG: indep
+ * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P
+ */
+%%
+%%{
+ machine eofact;
+
+ action a1 { prints "a1\n"; }
+ action a2 { prints "a2\n"; }
+ action a3 { prints "a3\n"; }
+ action a4 { prints "a4\n"; }
+
+
+ main := (
+ 'hello' @eof a1 %eof a2 '\n'? |
+ 'there' @eof a3 %eof a4
+ );
+
+}%%
+/* _____INPUT_____
+""
+"h"
+"hell"
+"hello"
+"hello\n"
+"t"
+"ther"
+"there"
+"friend"
+_____INPUT_____ */
+/* _____OUTPUT_____
+a1
+a3
+FAIL
+a1
+FAIL
+a1
+FAIL
+a2
+ACCEPT
+ACCEPT
+a3
+FAIL
+a3
+FAIL
+a4
+ACCEPT
+FAIL
+_____OUTPUT_____ */
diff --git a/test/erract1.rl b/test/erract1.rl
new file mode 100644
index 0000000..ee0237d
--- /dev/null
+++ b/test/erract1.rl
@@ -0,0 +1,145 @@
+/*
+ * @LANG: c++
+ */
+
+/*
+ * Test error actions.
+ */
+
+#include <iostream>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct ErrAct
+{
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ int init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( const char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine ErrAct;
+
+ action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); }
+ action expect_digit { printf(" DIGIT\n"); }
+ action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); }
+
+ float = (
+ (
+ [\-+] >err expect_digit_plus_minus %err expect_digit |
+ ""
+ )
+ ( [0-9] [0-9]* $err expect_digit_decimal )
+ ( '.' [0-9]+ $err expect_digit )?
+ );
+
+ main := float '\n';
+}%%
+
+%% write data;
+
+int ErrAct::init( )
+{
+ %% write init;
+ return 0;
+}
+
+int ErrAct::execute( const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+ %% write exec;
+
+ if ( cs == ErrAct_error )
+ return -1;
+ if ( cs >= ErrAct_first_final )
+ return 1;
+ return 0;
+}
+
+int ErrAct::finish( )
+{
+ %% write eof;
+ if ( cs == ErrAct_error )
+ return -1;
+ if ( cs >= ErrAct_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+void test( char *buf )
+{
+ ErrAct errAct;
+ errAct.init();
+ errAct.execute( buf, strlen(buf) );
+ if ( errAct.finish() > 0 )
+ cout << "ACCEPT" << endl;
+ else
+ cout << "FAIL" << endl;
+}
+
+int main()
+{
+ test( "1\n" );
+ test( "+1\n" );
+ test( "-1\n" );
+ test( "1.1\n" );
+ test( "+1.1\n" );
+ test( "-1.1\n" );
+ test( "a\n" );
+ test( "-\n" );
+ test( "+\n" );
+ test( "-a\n" );
+ test( "+b\n" );
+ test( "1.\n" );
+ test( "1d\n" );
+ test( "1.d\n" );
+ test( "1.1d\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ DIGIT PLUS MINUS
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT DECIMAL
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+#endif
diff --git a/test/erract2.rl b/test/erract2.rl
new file mode 100644
index 0000000..a4d0ef9
--- /dev/null
+++ b/test/erract2.rl
@@ -0,0 +1,80 @@
+/*
+ * @LANG: indep
+ *
+ * Test error actions.
+ */
+%%
+%%{
+ machine ErrAct;
+
+ action err_start { prints "err_start\n"; }
+ action err_all { prints "err_all\n"; }
+ action err_middle { prints "err_middle\n"; }
+ action err_out { prints "err_out\n"; }
+
+ action eof_start { prints "eof_start\n"; }
+ action eof_all { prints "eof_all\n"; }
+ action eof_middle { prints "eof_middle\n"; }
+ action eof_out { prints "eof_out\n"; }
+
+ main := ( 'hello'
+ >err err_start $err err_all <>err err_middle %err err_out
+ >eof eof_start $eof eof_all <>eof eof_middle %eof eof_out
+ ) '\n';
+}%%
+
+/* _____INPUT_____
+""
+"h"
+"x"
+"he"
+"hx"
+"hel"
+"hex"
+"hell"
+"helx"
+"hello"
+"hellx"
+"hello\n"
+"hellox"
+_____INPUT_____ */
+
+/* _____OUTPUT_____
+eof_start
+eof_all
+FAIL
+eof_all
+eof_middle
+FAIL
+err_start
+err_all
+FAIL
+eof_all
+eof_middle
+FAIL
+err_all
+err_middle
+FAIL
+eof_all
+eof_middle
+FAIL
+err_all
+err_middle
+FAIL
+eof_all
+eof_middle
+FAIL
+err_all
+err_middle
+FAIL
+eof_all
+eof_out
+FAIL
+err_all
+err_middle
+FAIL
+ACCEPT
+err_all
+err_out
+FAIL
+_____OUTPUT_____ */
diff --git a/test/erract3.rl b/test/erract3.rl
new file mode 100644
index 0000000..5490b67
--- /dev/null
+++ b/test/erract3.rl
@@ -0,0 +1,105 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#define IDENT_BUFLEN 256
+
+struct erract
+{
+ int cs;
+};
+
+%%{
+ machine erract;
+ variable curstate fsm->cs;
+
+ # The data that is to go into the fsm structure.
+ action hello_fails { printf("hello fails\n");}
+
+ newline = ( any | '\n' @{printf("newline\n");} )*;
+ hello = 'hello\n'* $lerr hello_fails @eof hello_fails;
+ main := newline | hello;
+}%%
+
+%% write data;
+
+void erract_init( struct erract *fsm )
+{
+ %% write init;
+}
+
+void erract_execute( struct erract *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+ %% write exec;
+}
+
+int erract_finish( struct erract *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == erract_error )
+ return -1;
+ else if ( fsm->cs >= erract_first_final )
+ return 1;
+ return 0;
+}
+
+#include <stdio.h>
+#include <string.h>
+
+struct erract fsm;
+
+void test( char *buf )
+{
+ int len = strlen(buf);
+ erract_init( &fsm );
+ erract_execute( &fsm, buf, len );
+ if ( erract_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test(
+ "hello\n"
+ "hello\n"
+ "hello\n"
+ );
+
+ test(
+ "hello\n"
+ "hello\n"
+ "hello there\n"
+ );
+
+ test(
+ "hello\n"
+ "hello\n"
+ "he" );
+
+ test( "" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+newline
+newline
+newline
+ACCEPT
+newline
+newline
+hello fails
+newline
+ACCEPT
+newline
+newline
+hello fails
+ACCEPT
+ACCEPT
+#endif
diff --git a/test/erract4.rl b/test/erract4.rl
new file mode 100644
index 0000000..1a753ef
--- /dev/null
+++ b/test/erract4.rl
@@ -0,0 +1,135 @@
+/*
+ * @LANG: obj-c
+ */
+
+#include <stdio.h>
+#include <objc/Object.h>
+
+#define IDENT_BUFLEN 256
+
+@interface ErrAct : Object
+{
+@public
+ int cs;
+};
+
+// Initialize the machine. Invokes any init statement blocks. Returns 0
+// if the machine begins in a non-accepting state and 1 if the machine
+// begins in an accepting state.
+- (int) initFsm;
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (void) executeWithData:(const char *)data len:(int)len;
+
+// Indicate that there is no more data. Returns -1 if the machine finishes
+// in the error state and does not accept, 0 if the machine finishes
+// in any other non-accepting state and 1 if the machine finishes in an
+// accepting state.
+- (int) finish;
+
+@end
+
+@implementation ErrAct
+
+%%{
+ machine ErrAct;
+
+ # The data that is to go into the fsm structure.
+ action hello_fails { printf("hello fails\n");}
+
+ newline = ( any | '\n' @{printf("newline\n");} )*;
+ hello = 'hello\n'* $^hello_fails @/hello_fails;
+ main := newline | hello;
+}%%
+
+%% write data;
+
+- (int) initFsm;
+{
+ %% write init;
+ return 1;
+}
+
+- (void) executeWithData:(const char *)_data len:(int)_len;
+{
+ const char *p = _data;
+ const char *pe = _data + _len;
+ %% write exec;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( cs == ErrAct_error )
+ return -1;
+ else if ( cs >= ErrAct_first_final )
+ return 1;
+ return 0;
+}
+
+@end
+
+#include <stdio.h>
+#include <string.h>
+#define BUFSIZE 2048
+
+ErrAct *fsm;
+char buf[BUFSIZE];
+
+void test( char *buf )
+{
+ int len = strlen(buf);
+ fsm = [[ErrAct alloc] init];
+
+ [fsm initFsm];
+ [fsm executeWithData:buf len:len];
+ if ( [fsm finish] > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test(
+ "hello\n"
+ "hello\n"
+ "hello\n"
+ );
+
+ test(
+ "hello\n"
+ "hello\n"
+ "hello there\n"
+ );
+
+ test(
+ "hello\n"
+ "hello\n"
+ "he" );
+
+ test( "" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+newline
+newline
+newline
+ACCEPT
+newline
+newline
+hello fails
+newline
+ACCEPT
+newline
+newline
+hello fails
+ACCEPT
+ACCEPT
+#endif
diff --git a/test/erract5.rl b/test/erract5.rl
new file mode 100644
index 0000000..73edec8
--- /dev/null
+++ b/test/erract5.rl
@@ -0,0 +1,146 @@
+/*
+ * @LANG: obj-c
+ */
+
+/*
+ * Test error actions.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <objc/Object.h>
+
+
+@interface ErrAct : Object
+{
+@public
+ int cs;
+};
+
+// Initialize the machine. Invokes any init statement blocks. Returns 0
+// if the machine begins in a non-accepting state and 1 if the machine
+// begins in an accepting state.
+- (int) initFsm;
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (void) executeWithData:(const char *)data len:(int)len;
+
+// Indicate that there is no more data. Returns -1 if the machine finishes
+// in the error state and does not accept, 0 if the machine finishes
+// in any other non-accepting state and 1 if the machine finishes in an
+// accepting state.
+- (int) finish;
+
+@end
+
+@implementation ErrAct
+
+%%{
+ machine ErrAct;
+
+ action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); }
+ action expect_digit { printf(" DIGIT\n"); }
+ action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); }
+
+ float = (
+ (
+ [\-+] >!expect_digit_plus_minus %!expect_digit |
+ ""
+ )
+ ( [0-9] [0-9]* $!expect_digit_decimal )
+ ( '.' [0-9]+ $!expect_digit )?
+ );
+
+ main := float '\n';
+}%%
+
+%% write data;
+
+- (int) initFsm;
+{
+ %% write init;
+ return 1;
+}
+
+- (void) executeWithData:(const char *)_data len:(int)_len;
+{
+ const char *p = _data;
+ const char *pe = _data + _len;
+ %% write exec;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( cs == ErrAct_error )
+ return -1;
+ else if ( cs >= ErrAct_first_final )
+ return 1;
+ return 0;
+}
+
+
+@end
+
+#define BUFSIZE 1024
+
+void test( char *buf )
+{
+ ErrAct *errAct = [[ErrAct alloc] init];
+ [errAct initFsm];
+ [errAct executeWithData:buf len:strlen(buf)];
+ if ( [errAct finish] > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test( "1\n" );
+ test( "+1\n" );
+ test( "-1\n" );
+ test( "1.1\n" );
+ test( "+1.1\n" );
+ test( "-1.1\n" );
+ test( "a\n" );
+ test( "-\n" );
+ test( "+\n" );
+ test( "-a\n" );
+ test( "+b\n" );
+ test( "1.\n" );
+ test( "1d\n" );
+ test( "1.d\n" );
+ test( "1.1d\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ACCEPT
+ DIGIT PLUS MINUS
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+ DIGIT DECIMAL
+FAIL
+ DIGIT
+FAIL
+ DIGIT
+FAIL
+#endif
diff --git a/test/errintrans.rl b/test/errintrans.rl
new file mode 100644
index 0000000..80b5a5a
--- /dev/null
+++ b/test/errintrans.rl
@@ -0,0 +1,84 @@
+/*
+ * @LANG: c
+ */
+
+/*
+ * Test of a transition going to the error state.
+ */
+
+#include <stdio.h>
+#define BUFSIZE 2048
+
+struct errintrans
+{
+ int cs;
+};
+
+%%{
+ machine errintrans;
+ variable curstate fsm->cs;
+
+ char = any - (digit | '\n');
+ line = char* "\n";
+ main := line+;
+}%%
+
+%% write data;
+
+void errintrans_init( struct errintrans *fsm )
+{
+ %% write init;
+}
+
+void errintrans_execute( struct errintrans *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int errintrans_finish( struct errintrans *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == errintrans_error )
+ return -1;
+ if ( fsm->cs >= errintrans_first_final )
+ return 1;
+ return 0;
+}
+
+
+struct errintrans fsm;
+#include <string.h>
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ errintrans_init( &fsm );
+ errintrans_execute( &fsm, buf, len );
+ if ( errintrans_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test(
+ "good, does not have numbers\n"
+ );
+
+ test(
+ "bad, has numbers 666\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+FAIL
+#endif
diff --git a/test/forder1.rl b/test/forder1.rl
new file mode 100644
index 0000000..a6366cb
--- /dev/null
+++ b/test/forder1.rl
@@ -0,0 +1,100 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct forder
+{
+ int cs;
+};
+
+%%{
+ machine forder;
+ variable curstate fsm->cs;
+
+ second = 'b'
+ >{printf("enter b1\n");}
+ >{printf("enter b2\n");}
+ ;
+
+ first = 'a'
+ %{printf("leave a\n");}
+ @{printf("finish a\n");}
+ ;
+
+ main := first . second . '\n';
+}%%
+
+%% write data;
+
+void forder_init( struct forder *fsm )
+{
+ %% write init;
+}
+
+void forder_execute( struct forder *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int forder_finish( struct forder *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == forder_error )
+ return -1;
+ if ( fsm->cs >= forder_first_final )
+ return 1;
+ return 0;
+}
+
+struct forder fsm;
+
+void test( char *buf )
+{
+ int len = strlen(buf);
+ forder_init( &fsm );
+ forder_execute( &fsm, buf, len );
+ if ( forder_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test( "ab\n");
+ test( "abx\n");
+ test( "" );
+
+ test(
+ "ab\n"
+ "fail after newline\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+finish a
+leave a
+enter b1
+enter b2
+ACCEPT
+finish a
+leave a
+enter b1
+enter b2
+FAIL
+FAIL
+finish a
+leave a
+enter b1
+enter b2
+FAIL
+#endif
diff --git a/test/forder2.rl b/test/forder2.rl
new file mode 100644
index 0000000..9592179
--- /dev/null
+++ b/test/forder2.rl
@@ -0,0 +1,135 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+/*
+ * After the fact start and ending transitions. Behaves like constructors of
+ * and destructors in c++.
+ */
+
+struct forder
+{
+ int cs;
+};
+
+%%{
+ machine forder;
+ variable curstate fsm->cs;
+
+ inner = 'inner'
+ >{printf("enter inner\n");}
+ ${printf("inside inner\n");}
+ %{printf("leave inner\n");}
+ ;
+
+ outter = inner
+ >{printf("enter outter\n");}
+ ${printf("inside outter\n");}
+ %{printf("leave outter\n");}
+ ;
+
+ main := outter . '\n';
+}%%
+
+%% write data;
+
+void forder_init( struct forder *fsm )
+{
+ %% write init;
+}
+
+void forder_execute( struct forder *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int forder_finish( struct forder *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == forder_error )
+ return -1;
+ if ( fsm->cs >= forder_first_final )
+ return 1;
+ return 0;
+}
+
+struct forder fsm;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ forder_init( &fsm );
+ forder_execute( &fsm, buf, len );
+ if ( forder_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test( "inner\n");
+
+ test(
+ "inner\n"
+ "foobar\n"
+ );
+
+ test( "" );
+ test( "\n" );
+ test( "inn\n" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+enter outter
+enter inner
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+leave inner
+leave outter
+ACCEPT
+enter outter
+enter inner
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+leave inner
+leave outter
+FAIL
+FAIL
+FAIL
+enter outter
+enter inner
+inside inner
+inside outter
+inside inner
+inside outter
+inside inner
+inside outter
+FAIL
+#endif
diff --git a/test/forder3.rl b/test/forder3.rl
new file mode 100644
index 0000000..7a659bb
--- /dev/null
+++ b/test/forder3.rl
@@ -0,0 +1,106 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct forder
+{
+ int cs;
+};
+
+%%{
+ machine forder;
+ variable curstate fsm->cs;
+
+ m1 = ( "" %{printf("enter m1 aa\n");} |
+ 'aa'* >{printf("enter m1 aa\n");} %{printf("leave m1 aa\n");} )
+ 'b' @{printf("through m1 b\n");} . 'b'* . 'a'*;
+
+ m2 = 'bbb'* 'aa'*;
+
+ main := (
+ m1 %{printf("accept m1\n");} |
+ "" %{printf("enter m2\n");} |
+ m2 >{printf("enter m2\n");} %{printf("accpet m2\n");}
+ ) . '\n';
+}%%
+
+%% write data;
+
+void forder_init( struct forder *fsm )
+{
+ %% write init;
+}
+
+void forder_execute( struct forder *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int forder_finish( struct forder *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == forder_error )
+ return -1;
+ if ( fsm->cs >= forder_first_final )
+ return 1;
+ return 0;
+}
+
+struct forder fsm;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ forder_init( &fsm );
+ forder_execute( &fsm, buf, len );
+ if ( forder_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test( "aaaaaabbbaa\n" );
+ test( "\n" );
+ test( "bbbbbbaaaaaaa\n" );
+ test( "bbbbbbaaaaaa\n" );
+ test( "aaaaa\n" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+enter m1 aa
+enter m2
+leave m1 aa
+through m1 b
+accept m1
+ACCEPT
+enter m2
+accpet m2
+ACCEPT
+enter m1 aa
+leave m1 aa
+through m1 b
+enter m2
+accept m1
+ACCEPT
+enter m1 aa
+leave m1 aa
+through m1 b
+enter m2
+accept m1
+accpet m2
+ACCEPT
+enter m1 aa
+enter m2
+FAIL
+#endif
diff --git a/test/gotocallret1.rl b/test/gotocallret1.rl
new file mode 100644
index 0000000..54626dd
--- /dev/null
+++ b/test/gotocallret1.rl
@@ -0,0 +1,113 @@
+/*
+ * @LANG: indep
+ */
+
+/*
+ * Demonstrate the use of goto, call and return. This machine expects either a
+ * lower case char or a digit as a command then a space followed by the command
+ * arg. If the command is a char, then the arg must be an a string of chars.
+ * If the command is a digit, then the arg must be a string of digits. This
+ * choice is determined by action code, rather than though transition
+ * desitinations.
+ */
+
+char comm;
+int top;
+int stack[32];
+%%
+%%{
+ machine GotoCallRet;
+
+ # A reference to a state in an unused action caused a segfault in 5.8. */
+ action unusedAction { fentry(garble_line); }
+
+ action err_garbling_line { prints "error: garbling line\n"; }
+ action goto_main { fgoto main; }
+ action recovery_failed { prints "error: failed to recover\n"; }
+
+ # Error machine, consumes to end of
+ # line, then starts the main line over.
+ garble_line := ( (any-'\n')*'\n')
+ >err_garbling_line
+ @goto_main
+ $/recovery_failed;
+
+ action hold_and_return {fhold; fret;}
+
+ # Look for a string of alphas or of digits,
+ # on anything else, hold the character and return.
+ alp_comm := alpha+ $!hold_and_return;
+ dig_comm := digit+ $!hold_and_return;
+
+ # Choose which to machine to call into based on the command.
+ action comm_arg {
+ if ( comm >= 'a' )
+ fcall alp_comm;
+ else
+ fcall dig_comm;
+ }
+
+ # Specifies command string. Note that the arg is left out.
+ command = (
+ [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n'
+ ) @{prints "correct command\n";};
+
+ # Any number of commands. If there is an
+ # error anywhere, garble the line.
+ main := command* $!{fhold;fgoto garble_line;};
+}%%
+/* _____INPUT_____
+"lkajsdf\n"
+"2134\n"
+"(\n"
+"\n"
+"*234234()0909 092 -234aslkf09`1 11\n"
+"1\n"
+"909\n"
+"1 a\n"
+"11 1\n"
+"a 1\n"
+"aa a\n"
+"1 1\n"
+"1 123456\n"
+"a a\n"
+"a abcdef\n"
+"h"
+"a aa1"
+_____INPUT_____ */
+/* _____OUTPUT_____
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+error: garbling line
+ACCEPT
+correct command
+ACCEPT
+correct command
+ACCEPT
+correct command
+ACCEPT
+correct command
+ACCEPT
+FAIL
+error: garbling line
+error: failed to recover
+FAIL
+_____OUTPUT_____ */
diff --git a/test/gotocallret2.rl b/test/gotocallret2.rl
new file mode 100644
index 0000000..4a3bc0e
--- /dev/null
+++ b/test/gotocallret2.rl
@@ -0,0 +1,77 @@
+/*
+ * @LANG: indep
+ */
+
+char comm;
+int top;
+int stack[32];
+ptr tokstart;
+ptr tokend;
+int act;
+int val;
+%%
+%%{
+ machine GotoCallRet;
+
+ sp = ' ';
+
+ handle := any @{
+ prints "handle ";
+ fhold;
+ if ( val == 1 ) fnext *fentry(one);
+ if ( val == 2 ) fnext *fentry(two);
+ if ( val == 3 ) fnext main;
+ };
+
+ one := |*
+ '{' => { prints "{ "; fcall *fentry(one); };
+ "[" => { prints "[ "; fcall *fentry(two); };
+ "}" sp* => { prints "} "; fret; };
+ [a-z]+ => { prints "word "; val = 1; fgoto *fentry(handle); };
+ ' ' => { prints "space "; };
+ *|;
+
+ two := |*
+ '{' => { prints "{ "; fcall *fentry(one); };
+ "[" => { prints "[ "; fcall *fentry(two); };
+ ']' sp* => { prints "] "; fret; };
+ [a-z]+ => { prints "word "; val = 2; fgoto *fentry(handle); };
+ ' ' => { prints "space "; };
+ *|;
+
+ main := |*
+ '{' => { prints "{ "; fcall one; };
+ "[" => { prints "[ "; fcall two; };
+ [a-z]+ => { prints "word "; val = 3; fgoto handle; };
+ [a-z] ' foil' => { prints "this is the foil";};
+ ' ' => { prints "space "; };
+ '\n';
+ *|;
+}%%
+/* _____INPUT_____
+"{a{b[c d]d}c}\n"
+"[a{b[c d]d}c}\n"
+"[a[b]c]d{ef{g{h}i}j}l\n"
+"{{[]}}\n"
+"a b c\n"
+"{a b c}\n"
+"[a b c]\n"
+"{]\n"
+"{{}\n"
+"[[[[[[]]]]]]\n"
+"[[[[[[]]}]]]\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+{ word handle { word handle [ word handle space word handle ] word handle } word handle } ACCEPT
+[ word handle { word handle [ word handle space word handle ] word handle } word handle FAIL
+[ word handle [ word handle ] word handle ] word handle { word handle { word handle { word handle } word handle } word handle } word handle ACCEPT
+{ { [ ] } } ACCEPT
+word handle space word handle space word handle ACCEPT
+{ word handle space word handle space word handle } ACCEPT
+[ word handle space word handle space word handle ] ACCEPT
+{ FAIL
+{ { } FAIL
+[ [ [ [ [ [ ] ] ] ] ] ] ACCEPT
+[ [ [ [ [ [ ] ] FAIL
+_____OUTPUT_____ */
+
diff --git a/test/high1.rl b/test/high1.rl
new file mode 100644
index 0000000..2ad1b60
--- /dev/null
+++ b/test/high1.rl
@@ -0,0 +1,183 @@
+/*
+ * @LANG: c
+ * @ALLOW_GENFLAGS: -T0 -T1 -G0 -G1 -G2
+ */
+
+/**
+ * Test a high character to make sure signedness
+ * isn't messing us up.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct high
+{
+ int cs;
+};
+
+%%{
+ machine high;
+ variable curstate fsm->cs;
+
+ # We Want the header portion.
+ alphtype unsigned int;
+
+ main := (
+ 0x20 .. 0xefffffff @1 @{printf("gothigh1\n");} |
+ 0xf0000000 @1 @{printf("gothigh1\n");} |
+ 0x200 .. 0xfe000000 @1 @{printf("gothigh2\n");} |
+ any @0 @{printf("else\n");}
+ )*;
+}%%
+
+%% write data;
+
+void high_init( struct high *fsm )
+{
+ %% write init;
+}
+
+void high_execute( struct high *fsm, const unsigned int *_data, int _len )
+{
+ const unsigned int *p = _data;
+ const unsigned int *pe = _data+_len;
+
+ %% write exec;
+}
+
+int high_finish( struct high *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == high_error )
+ return -1;
+ if ( fsm->cs >= high_first_final )
+ return 1;
+ return 0;
+}
+
+struct high high;
+
+#define BUFSIZE 1024
+char cbuf[BUFSIZE];
+unsigned int buf[BUFSIZE];
+int buflen = 0;
+char numbuf[9];
+int numlen = 0;
+
+struct tokenizer
+{
+ int cs;
+};
+
+%%{
+ machine tokenizer;
+ variable curstate fsm->cs;
+
+ action bufdigit {
+ if ( numlen < 8 )
+ numbuf[numlen++] = fc;
+ }
+
+ action writeDigit {
+ /* Null terminate the buffer storing the number and reset. */
+ numbuf[numlen] = 0;
+ numlen = 0;
+
+ /* Store the number in the buf. If the buf is full then
+ * flush and reset the buffer. */
+ buf[buflen++] = strtoul( numbuf, 0, 16 );
+ if ( buflen == BUFSIZE ) {
+ high_execute( &high, buf, BUFSIZE );
+ buflen = 0;
+ }
+ }
+
+ action finish {
+ if ( buflen > 0 )
+ high_execute( &high, buf, buflen );
+ if ( high_finish( &high ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+ }
+
+ num = ( digit | 'a'..'f' )+ $bufdigit %writeDigit;
+ main := ( num $1 %0 | space )* %/finish;
+}%%
+
+%% write data;
+
+void tokenizer_init( struct tokenizer *fsm )
+{
+ %% write init;
+}
+
+void tokenizer_execute( struct tokenizer *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int tokenizer_finish( struct tokenizer *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == tokenizer_error )
+ return -1;
+ if ( fsm->cs >= tokenizer_first_final )
+ return 1;
+ return 0;
+}
+
+struct tokenizer tok;
+
+void test( char *cbuf )
+{
+ int len = strlen( cbuf );
+ high_init( &high );
+ tokenizer_init( &tok );
+ tokenizer_execute( &tok, cbuf, len );
+ if ( tokenizer_finish( &tok ) <= 0 )
+ printf("Tokenizer FAIL\n");
+}
+
+char data[] =
+ "10 20 30 40 50 200 300 400 \n"
+ "d0000000 f0000000 fd000000 fe000000\n"
+ "ff000000 ffffffffffffffffffffffffff\n"
+ "ff\n";
+
+int main()
+{
+ test( data );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+else
+gothigh1
+gothigh1
+gothigh1
+gothigh1
+gothigh1
+gothigh2
+gothigh1
+gothigh2
+gothigh1
+gothigh2
+gothigh1
+gothigh2
+gothigh1
+gothigh2
+gothigh2
+gothigh2
+else
+else
+gothigh1
+ACCEPT
+#endif
diff --git a/test/high2.rl b/test/high2.rl
new file mode 100644
index 0000000..1aeb9b3
--- /dev/null
+++ b/test/high2.rl
@@ -0,0 +1,104 @@
+/*
+ * @LANG: c++
+ */
+
+/**
+ * Test a high character to make sure signedness
+ * isn't messing us up.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct Fsm
+{
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ int init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( const unsigned char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine Fsm;
+
+ alphtype unsigned char;
+
+ # Indicate we got the high character.
+ action gothigh {
+ printf("yes\n");
+ }
+
+ main := 0xe8 @gothigh '\n';
+}%%
+
+%% write data;
+
+int Fsm::init( )
+{
+ %% write init;
+ return 0;
+}
+
+int Fsm::execute( const unsigned char *_data, int _len )
+{
+ const unsigned char *p = _data;
+ const unsigned char *pe = _data+_len;
+ %% write exec;
+ if ( cs == Fsm_error )
+ return -1;
+ if ( cs >= Fsm_first_final )
+ return 1;
+ return 0;
+}
+
+int Fsm::finish()
+{
+ %% write eof;
+ if ( cs == Fsm_error )
+ return -1;
+ if ( cs >= Fsm_first_final )
+ return 1;
+ return 0;
+}
+
+Fsm fsm;
+
+void test( unsigned char *buf, int len )
+{
+ fsm.init();
+ fsm.execute( buf, len );
+ if ( fsm.finish() > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+unsigned char data1[] = { 0xe8, 10 };
+unsigned char data2[] = { 0xf8, 10 };
+
+int main()
+{
+ test( data1, 2 );
+ test( data2, 2 );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+yes
+ACCEPT
+FAIL
+#endif
diff --git a/test/high3.rl b/test/high3.rl
new file mode 100644
index 0000000..03d2a74
--- /dev/null
+++ b/test/high3.rl
@@ -0,0 +1,112 @@
+/*
+ * @LANG: obj-c
+ */
+
+/**
+ * Test a high character to make sure signedness
+ * isn't messing us up.
+ */
+
+#include <stdio.h>
+#include <objc/Object.h>
+
+@interface Fsm : Object
+{
+@public
+ int cs;
+};
+
+// Initialize the machine. Invokes any init statement blocks. Returns 0
+// if the machine begins in a non-accepting state and 1 if the machine
+// begins in an accepting state.
+- (int) initFsm;
+
+// Execute the machine on a block of data. Returns -1 if after processing
+// the data, the machine is in the error state and can never accept, 0 if
+// the machine is in a non-accepting state and 1 if the machine is in an
+// accepting state.
+- (void) executeWithData:(const unsigned char *)data len:(int)len;
+
+// Indicate that there is no more data. Returns -1 if the machine finishes
+// in the error state and does not accept, 0 if the machine finishes
+// in any other non-accepting state and 1 if the machine finishes in an
+// accepting state.
+- (int) finish;
+
+@end
+
+@implementation Fsm
+
+%%{
+ machine Fsm;
+
+ alphtype unsigned char;
+
+ # Indicate we got the high character.
+ action gothigh {
+ printf("yes\n");
+ }
+
+ main := 0xe8 @gothigh '\n';
+}%%
+
+%% write data;
+
+- (int) initFsm;
+{
+ %% write init;
+ return 1;
+}
+
+- (void) executeWithData:(const unsigned char *)_data len:(int)_len;
+{
+ const unsigned char *p = _data;
+ const unsigned char *pe = _data + _len;
+ %% write exec;
+}
+
+- (int) finish;
+{
+ %% write eof;
+ if ( cs == Fsm_error )
+ return -1;
+ else if ( cs >= Fsm_first_final )
+ return 1;
+ return 0;
+}
+
+
+@end
+
+
+#define BUFSIZE 2048
+
+Fsm *fsm;
+unsigned char buf[BUFSIZE];
+
+void test( unsigned char *buf, int len )
+{
+ fsm = [[Fsm alloc] init];
+ [fsm initFsm];
+ [fsm executeWithData:buf len:len];
+ if ( [fsm finish] > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+unsigned char data1[] = { 0xe8, 10 };
+unsigned char data2[] = { 0xf8, 10 };
+
+int main()
+{
+ test( data1, 2 );
+ test( data2, 2 );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+yes
+ACCEPT
+FAIL
+#endif
diff --git a/test/include1.rl b/test/include1.rl
new file mode 100644
index 0000000..30145de
--- /dev/null
+++ b/test/include1.rl
@@ -0,0 +1,28 @@
+/*
+ * @LANG: c
+ * @IGNORE: yes
+ *
+ * Provides definitions for include tests.
+ */
+
+%%{
+ machine include_test_1;
+
+ action A {printf(" a1");}
+ action B {printf(" b1");}
+
+ action NonRef1 {printf(" nr1");}
+
+ a1 = 'a' @A;
+ b1 = 'b' @B;
+}%%
+
+%%{
+ machine include_test_2;
+
+ action NonRef2 {printf(" nr2");}
+
+ a2 = 'a' @{printf(" a2");};
+ b2 = 'b' @{printf(" b2");};
+}%%
+
diff --git a/test/include2.rl b/test/include2.rl
new file mode 100644
index 0000000..68ab007
--- /dev/null
+++ b/test/include2.rl
@@ -0,0 +1,52 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+%%{
+ machine include_test_4;
+
+ action NonRef3 {printf(" nr3");}
+
+ a3 = 'a'@{printf(" a3");};
+ b3 = 'b'@{printf(" b3");};
+
+}%%
+
+%%{
+ machine include_test_1;
+
+ include "include1.rl";
+
+ include include_test_2 "include1.rl";
+
+ include include_test_4;
+
+ main :=
+ a1 b1 @NonRef1
+ a2 b2 @NonRef2
+ a3 b3 @NonRef3
+ 0 @{fbreak;};
+}%%
+
+%% write data;
+
+void test( char *p )
+{
+ int cs;
+ %% write init;
+ %% write exec noend;
+ printf("\n");
+}
+
+int main()
+{
+ test( "ababab" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ a1 b1 nr1 a2 b2 nr2 a3 b3 nr3
+#endif
diff --git a/test/java1.rl b/test/java1.rl
new file mode 100644
index 0000000..128386f
--- /dev/null
+++ b/test/java1.rl
@@ -0,0 +1,49 @@
+/*
+ * @LANG: java
+ * @ALLOW_GENFLAGS: -T0
+ */
+
+class java1
+{
+ %%{
+ machine java1;
+
+ one := 'one\n';
+ two := 'two\n';
+ four := 'four\n';
+
+ main :=
+ ( 'hello' | 'there' | 'friend' )
+ '\n' @{int s = fentry(one); fgoto *s; char c = fc;}
+ ( 'one' | 'two' | 'four' ) '\n';
+ }%%
+
+ %% write data;
+
+ static void test( char data[] )
+ {
+ int cs, p = 0, pe = data.length;
+ int top;
+
+ %% write init;
+ %% write exec;
+
+ if ( cs >= java1_first_final )
+ System.out.println( "ACCEPT" );
+ else
+ System.out.println( "FAIL" );
+ }
+
+ public static void main( String args[] )
+ {
+ test( "hello\none\n".toCharArray() );
+ test( "there\ntwo\n".toCharArray() );
+ test( "friend\nfour\n".toCharArray() );
+ }
+}
+
+/* _____OUTPUT_____
+ACCEPT
+FAIL
+FAIL
+*/
diff --git a/test/java2.rl b/test/java2.rl
new file mode 100644
index 0000000..61d9ac9
--- /dev/null
+++ b/test/java2.rl
@@ -0,0 +1,51 @@
+/*
+ * @LANG: java
+ * @ALLOW_GENFLAGS: -T0
+ */
+
+class java2
+{
+ %%{
+ machine java1;
+ alphtype int;
+
+ main := 1 2 3 4 (
+ 5 6 7 8 |
+ 9 10 11 12
+ ) 1073741824;
+
+ }%%
+
+ %% write data;
+
+ static void test( int data[] )
+ {
+ int cs, p = 0, pe = data.length;
+ int top;
+
+ %% write init;
+ %% write exec;
+
+ if ( cs >= java1_first_final )
+ System.out.println( "ACCEPT" );
+ else
+ System.out.println( "FAIL" );
+ }
+
+ static final int t1[] = { 1, 2, 3, 4, 5, 6, 7, 8, 1073741824 };
+ static final int t2[] = { 1, 2, 3, 4, 9, 10, 11, 12, 1073741824 };
+ static final int t3[] = { 1, 2, 3, 4, 1073741824 };
+
+ public static void main( String args[] )
+ {
+ test( t1 );
+ test( t2 );
+ test( t3 );
+ }
+}
+
+/* _____OUTPUT_____
+ACCEPT
+ACCEPT
+FAIL
+*/
diff --git a/test/keller1.rl b/test/keller1.rl
new file mode 100644
index 0000000..94d25b7
--- /dev/null
+++ b/test/keller1.rl
@@ -0,0 +1,1076 @@
+/*
+ * @LANG: c++
+ */
+
+/*
+ * Automatically generated by keller. Do not edit.
+ *
+ * Parts of this file are copied from Keller source covered by the GNU
+ * GPL. As a special exception, you may use the parts of this file copied
+ * from Keller source without restriction. The remainder is derived from
+ * "tmp.gmr" and inherits the copyright status of that file.
+ */
+
+#line 1 "tmp.gmr"
+#include <iostream>
+using std::cout;
+using std::endl;
+
+
+#line 16 "tmp.rl"
+enum token_type_e {
+ tt_id,
+ tt_equals,
+ tt_semi,
+ tt_pipe,
+ tt_amp,
+ tt_minus,
+ tt_dot,
+ tt_colon,
+ tt_percent,
+ tt_dollar,
+ tt_plus,
+ tt_number,
+ tt_star,
+ tt_question,
+ tt_not,
+ tt_andFSM,
+ tt_orFSM,
+ tt_open,
+ tt_close
+};
+
+struct LangEl
+{
+ int line, lineEnd;
+ int pos;
+
+ int type;
+ int state;
+ LangEl *prev, *next;
+};
+
+struct Token : public LangEl
+{
+ const char *value;
+};
+
+struct Lel_start : public LangEl
+{
+#line 32 "tmp.gmr"
+
+ int si;
+#line 59 "tmp.rl"
+};
+
+struct Lel_M : public LangEl
+{
+#line 36 "tmp.gmr"
+
+ int mi;
+#line 67 "tmp.rl"
+};
+
+#define l__error 19
+#define l_tt_id 0
+#define l_tt_equals 1
+#define l_tt_semi 2
+#define l_tt_pipe 3
+#define l_tt_amp 4
+#define l_tt_minus 5
+#define l_tt_dot 6
+#define l_tt_colon 7
+#define l_tt_percent 8
+#define l_tt_dollar 9
+#define l_tt_plus 10
+#define l_tt_number 11
+#define l_tt_star 12
+#define l_tt_question 13
+#define l_tt_not 14
+#define l_tt_andFSM 15
+#define l_tt_orFSM 16
+#define l_tt_open 17
+#define l_tt_close 18
+#define l_start 23
+#define l_M 24
+#define l_A 25
+#define l_E 26
+#define l_T 27
+#define l_N 28
+#define l_K 29
+#define l_F 30
+#define l__start 31
+#define l__eof 20
+
+struct LangEl;
+
+struct Parser
+{
+ Parser();
+
+ void parseLangEl( LangEl *langEl );
+ int done( );
+
+ void push( LangEl *lel ) {
+ lel->prev = stack;
+ stack = lel;
+ }
+ LangEl *pop() {
+ LangEl *ret = stack;
+ stack = stack->prev;
+ return ret;
+ }
+ int pop( int n );
+ void rem( LangEl *lel, int n );
+ LangEl *stack;
+ int next;
+ LangEl *redLel;
+ LangEl *rhs[10];
+
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ int init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( LangEl *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+
+%%{
+ machine Parser;
+
+ getkey fpc->type;
+
+ action shift {
+ fpc->state = fcurs;
+ push( fpc );
+ }
+
+ action pop1 { fnext *pop(1); }
+ action pop2 { fnext *pop(2); }
+ action pop3 { fnext *pop(3); }
+ action pop4 { fnext *pop(4); }
+
+ action new_error {
+ redLel = new LangEl();
+ redLel->type = 19;
+ }
+
+ action newstart {
+ redLel = new Lel_start();
+ redLel->type = 23;
+ }
+
+ action newM {
+ redLel = new Lel_M();
+ redLel->type = 24;
+ }
+
+ action newA {
+ redLel = new LangEl();
+ redLel->type = 25;
+ }
+
+ action newE {
+ redLel = new LangEl();
+ redLel->type = 26;
+ }
+
+ action newT {
+ redLel = new LangEl();
+ redLel->type = 27;
+ }
+
+ action newN {
+ redLel = new LangEl();
+ redLel->type = 28;
+ }
+
+ action newK {
+ redLel = new LangEl();
+ redLel->type = 29;
+ }
+
+ action newF {
+ redLel = new LangEl();
+ redLel->type = 30;
+ }
+
+ action new_eof {
+ redLel = new LangEl();
+ redLel->type = 20;
+ }
+
+ action new_epsilon {
+ redLel = new LangEl();
+ redLel->type = 21;
+ }
+
+ action new_null {
+ redLel = new LangEl();
+ redLel->type = 22;
+ }
+
+ action rem1 { rem(fpc, 1); }
+ action rem2 { rem(fpc, 2); }
+ action rem3 { rem(fpc, 3); }
+ action rem4 { rem(fpc, 4); }
+
+ action r_start_0
+ {
+#line 41 "tmp.gmr"
+
+ cout << "start = M;" << endl;
+ static_cast<Lel_start*>(redLel)->si = static_cast<Lel_M*>(rhs[0])->mi;
+
+#line 214 "tmp.rl"
+ }
+
+ action r_M_0
+ {
+#line 44 "tmp.gmr"
+ cout << "M = M A;" << endl;
+#line 221 "tmp.rl"
+ }
+
+ action r_M_1
+ {
+#line 45 "tmp.gmr"
+ cout << "M = A;" << endl;
+#line 228 "tmp.rl"
+ }
+
+ action r_A_0
+ {
+#line 46 "tmp.gmr"
+ cout << "A = tt_id tt_equals E tt_semi;" << endl;
+#line 235 "tmp.rl"
+ }
+
+ action r_E_0
+ {
+#line 47 "tmp.gmr"
+ cout << "E = E tt_pipe T;" << endl;
+#line 242 "tmp.rl"
+ }
+
+ action r_E_1
+ {
+#line 48 "tmp.gmr"
+ cout << "E = E tt_amp T;" << endl;
+#line 249 "tmp.rl"
+ }
+
+ action r_E_2
+ {
+#line 49 "tmp.gmr"
+ cout << "E = E tt_minus T;" << endl;
+#line 256 "tmp.rl"
+ }
+
+ action r_E_3
+ {
+#line 50 "tmp.gmr"
+ cout << "E = T;" << endl;
+#line 263 "tmp.rl"
+ }
+
+ action r_T_0
+ {
+#line 51 "tmp.gmr"
+ cout << "T = T tt_dot N;" << endl;
+#line 270 "tmp.rl"
+ }
+
+ action r_T_1
+ {
+#line 52 "tmp.gmr"
+ cout << "T = T N;" << endl;
+#line 277 "tmp.rl"
+ }
+
+ action r_T_2
+ {
+#line 53 "tmp.gmr"
+ cout << "T = N;" << endl;
+#line 284 "tmp.rl"
+ }
+
+ action r_N_0
+ {
+#line 54 "tmp.gmr"
+ cout << "N = N tt_colon tt_id;" << endl;
+#line 291 "tmp.rl"
+ }
+
+ action r_N_1
+ {
+#line 55 "tmp.gmr"
+ cout << "N = N tt_percent tt_id;" << endl;
+#line 298 "tmp.rl"
+ }
+
+ action r_N_2
+ {
+#line 56 "tmp.gmr"
+ cout << "N = N tt_dollar tt_id;" << endl;
+#line 305 "tmp.rl"
+ }
+
+ action r_N_3
+ {
+#line 57 "tmp.gmr"
+ cout << "N = N tt_colon tt_plus tt_number;" << endl;
+#line 312 "tmp.rl"
+ }
+
+ action r_N_4
+ {
+#line 58 "tmp.gmr"
+ cout << "N = N tt_colon tt_minus tt_number;" << endl;
+#line 319 "tmp.rl"
+ }
+
+ action r_N_5
+ {
+#line 59 "tmp.gmr"
+ cout << "N = N tt_percent tt_plus tt_number;" << endl;
+#line 326 "tmp.rl"
+ }
+
+ action r_N_6
+ {
+#line 60 "tmp.gmr"
+ cout << "N = N tt_percent tt_minus tt_number;" << endl;
+#line 333 "tmp.rl"
+ }
+
+ action r_N_7
+ {
+#line 61 "tmp.gmr"
+ cout << "N = N tt_dollar tt_plus tt_number;" << endl;
+#line 340 "tmp.rl"
+ }
+
+ action r_N_8
+ {
+#line 62 "tmp.gmr"
+ cout << "N = N tt_dollar tt_minus tt_number;" << endl;
+#line 347 "tmp.rl"
+ }
+
+ action r_N_9
+ {
+#line 63 "tmp.gmr"
+ cout << "N = K;" << endl;
+#line 354 "tmp.rl"
+ }
+
+ action r_K_0
+ {
+#line 64 "tmp.gmr"
+ cout << "K = F tt_star;" << endl;
+#line 361 "tmp.rl"
+ }
+
+ action r_K_1
+ {
+#line 65 "tmp.gmr"
+ cout << "K = F tt_question;" << endl;
+#line 368 "tmp.rl"
+ }
+
+ action r_K_2
+ {
+#line 66 "tmp.gmr"
+ cout << "K = F tt_plus;" << endl;
+#line 375 "tmp.rl"
+ }
+
+ action r_K_3
+ {
+#line 67 "tmp.gmr"
+ cout << "K = F;" << endl;
+#line 382 "tmp.rl"
+ }
+
+ action r_K_4
+ {
+#line 68 "tmp.gmr"
+ cout << "K = tt_not F tt_star;" << endl;
+#line 389 "tmp.rl"
+ }
+
+ action r_K_5
+ {
+#line 69 "tmp.gmr"
+ cout << "K = tt_not F tt_question;" << endl;
+#line 396 "tmp.rl"
+ }
+
+ action r_K_6
+ {
+#line 70 "tmp.gmr"
+ cout << "K = tt_not F tt_plus;" << endl;
+#line 403 "tmp.rl"
+ }
+
+ action r_K_7
+ {
+#line 71 "tmp.gmr"
+ cout << "K = tt_not F;" << endl;
+#line 410 "tmp.rl"
+ }
+
+ action r_F_0
+ {
+#line 72 "tmp.gmr"
+ cout << "F = tt_andFSM;" << endl;
+#line 417 "tmp.rl"
+ }
+
+ action r_F_1
+ {
+#line 73 "tmp.gmr"
+ cout << "F = tt_orFSM;" << endl;
+#line 424 "tmp.rl"
+ }
+
+ action r_F_2
+ {
+#line 74 "tmp.gmr"
+ cout << "F = tt_id;" << endl;
+#line 431 "tmp.rl"
+ }
+
+ action r_F_3
+ {
+#line 75 "tmp.gmr"
+ cout << "F = tt_open E tt_close;" << endl;
+#line 438 "tmp.rl"
+ }
+
+ main :=
+ s0: start: (
+ 23 @shift -> s1 |
+ 25 @shift -> s3 |
+ 24 @shift -> s4 |
+ 0 @shift -> s5
+ ),
+ s1: (
+ 20 @shift -> s54
+ ),
+ s2: (
+ (0|20) @pop2 @newM @r_M_0 @rem2 -> s54
+ ),
+ s3: (
+ (0|20) @pop1 @newM @r_M_1 @rem1 -> s54
+ ),
+ s4: (
+ 20 @pop1 @newstart @r_start_0 @rem1 -> s54 |
+ 25 @shift -> s2 |
+ 0 @shift -> s5
+ ),
+ s5: (
+ 1 @shift -> s6
+ ),
+ s6: (
+ 26 @shift -> s8 |
+ 27 @shift -> s9 |
+ 29 @shift -> s25 |
+ 28 @shift -> s26 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s7: (
+ (0|20) @pop4 @newA @r_A_0 @rem4 -> s54
+ ),
+ s8: (
+ 2 @shift -> s7 |
+ 3 @shift -> s37 |
+ 4 @shift -> s38 |
+ 5 @shift -> s39
+ ),
+ s9: (
+ (2..5|18) @pop1 @newE @r_E_3 @rem1 -> s54 |
+ 29 @shift -> s25 |
+ 30 @shift -> s33 |
+ 28 @shift -> s34 |
+ 17 @shift -> s35 |
+ 6 @shift -> s41 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s10: (
+ (0|2..9|14..18) @pop3 @newN @r_N_0 @rem3 -> s54
+ ),
+ s11: (
+ (0|2..9|14..18) @pop3 @newN @r_N_1 @rem3 -> s54
+ ),
+ s12: (
+ (0|2..9|14..18) @pop3 @newN @r_N_2 @rem3 -> s54
+ ),
+ s13: (
+ 11 @shift -> s14
+ ),
+ s14: (
+ (0|2..9|14..18) @pop4 @newN @r_N_3 @rem4 -> s54
+ ),
+ s15: (
+ 11 @shift -> s16
+ ),
+ s16: (
+ (0|2..9|14..18) @pop4 @newN @r_N_4 @rem4 -> s54
+ ),
+ s17: (
+ 11 @shift -> s18
+ ),
+ s18: (
+ (0|2..9|14..18) @pop4 @newN @r_N_5 @rem4 -> s54
+ ),
+ s19: (
+ 11 @shift -> s20
+ ),
+ s20: (
+ (0|2..9|14..18) @pop4 @newN @r_N_6 @rem4 -> s54
+ ),
+ s21: (
+ 11 @shift -> s22
+ ),
+ s22: (
+ (0|2..9|14..18) @pop4 @newN @r_N_7 @rem4 -> s54
+ ),
+ s23: (
+ 11 @shift -> s24
+ ),
+ s24: (
+ (0|2..9|14..18) @pop4 @newN @r_N_8 @rem4 -> s54
+ ),
+ s25: (
+ (0|2..9|14..18) @pop1 @newN @r_N_9 @rem1 -> s54
+ ),
+ s26: (
+ (0|2..6|14..18) @pop1 @newT @r_T_2 @rem1 -> s54 |
+ 7 @shift -> s27 |
+ 8 @shift -> s28 |
+ 9 @shift -> s29
+ ),
+ s27: (
+ 0 @shift -> s10 |
+ 10 @shift -> s13 |
+ 5 @shift -> s15
+ ),
+ s28: (
+ 0 @shift -> s11 |
+ 10 @shift -> s17 |
+ 5 @shift -> s19
+ ),
+ s29: (
+ 0 @shift -> s12 |
+ 10 @shift -> s21 |
+ 5 @shift -> s23
+ ),
+ s30: (
+ (0|2..9|14..18) @pop2 @newK @r_K_0 @rem2 -> s54
+ ),
+ s31: (
+ (0|2..9|14..18) @pop2 @newK @r_K_1 @rem2 -> s54
+ ),
+ s32: (
+ (0|2..9|14..18) @pop2 @newK @r_K_2 @rem2 -> s54
+ ),
+ s33: (
+ (0|2..9|14..18) @pop1 @newK @r_K_3 @rem1 -> s54 |
+ 12 @shift -> s30 |
+ 13 @shift -> s31 |
+ 10 @shift -> s32
+ ),
+ s34: (
+ (0|2..6|14..18) @pop2 @newT @r_T_1 @rem2 -> s54 |
+ 7 @shift -> s27 |
+ 8 @shift -> s28 |
+ 9 @shift -> s29
+ ),
+ s35: (
+ 27 @shift -> s9 |
+ 29 @shift -> s25 |
+ 28 @shift -> s26 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 26 @shift -> s40 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s36: (
+ (0|2..10|12..18) @pop3 @newF @r_F_3 @rem3 -> s54
+ ),
+ s37: (
+ 29 @shift -> s25 |
+ 28 @shift -> s26 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50 |
+ 27 @shift -> s53
+ ),
+ s38: (
+ 29 @shift -> s25 |
+ 28 @shift -> s26 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50 |
+ 27 @shift -> s52
+ ),
+ s39: (
+ 29 @shift -> s25 |
+ 28 @shift -> s26 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 27 @shift -> s42 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s40: (
+ 18 @shift -> s36 |
+ 3 @shift -> s37 |
+ 4 @shift -> s38 |
+ 5 @shift -> s39
+ ),
+ s41: (
+ 29 @shift -> s25 |
+ 30 @shift -> s33 |
+ 17 @shift -> s35 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50 |
+ 28 @shift -> s51
+ ),
+ s42: (
+ (2..5|18) @pop3 @newE @r_E_2 @rem3 -> s54 |
+ 29 @shift -> s25 |
+ 30 @shift -> s33 |
+ 28 @shift -> s34 |
+ 17 @shift -> s35 |
+ 6 @shift -> s41 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s43: (
+ (0|2..9|14..18) @pop3 @newK @r_K_4 @rem3 -> s54
+ ),
+ s44: (
+ (0|2..9|14..18) @pop3 @newK @r_K_5 @rem3 -> s54
+ ),
+ s45: (
+ (0|2..9|14..18) @pop3 @newK @r_K_6 @rem3 -> s54
+ ),
+ s46: (
+ 17 @shift -> s35 |
+ 30 @shift -> s47 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s47: (
+ (0|2..9|14..18) @pop2 @newK @r_K_7 @rem2 -> s54 |
+ 12 @shift -> s43 |
+ 13 @shift -> s44 |
+ 10 @shift -> s45
+ ),
+ s48: (
+ (0|2..10|12..18) @pop1 @newF @r_F_0 @rem1 -> s54
+ ),
+ s49: (
+ (0|2..10|12..18) @pop1 @newF @r_F_1 @rem1 -> s54
+ ),
+ s50: (
+ (0|2..10|12..18) @pop1 @newF @r_F_2 @rem1 -> s54
+ ),
+ s51: (
+ (0|2..6|14..18) @pop3 @newT @r_T_0 @rem3 -> s54 |
+ 7 @shift -> s27 |
+ 8 @shift -> s28 |
+ 9 @shift -> s29
+ ),
+ s52: (
+ (2..5|18) @pop3 @newE @r_E_1 @rem3 -> s54 |
+ 29 @shift -> s25 |
+ 30 @shift -> s33 |
+ 28 @shift -> s34 |
+ 17 @shift -> s35 |
+ 6 @shift -> s41 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s53: (
+ (2..5|18) @pop3 @newE @r_E_0 @rem3 -> s54 |
+ 29 @shift -> s25 |
+ 30 @shift -> s33 |
+ 28 @shift -> s34 |
+ 17 @shift -> s35 |
+ 6 @shift -> s41 |
+ 14 @shift -> s46 |
+ 15 @shift -> s48 |
+ 16 @shift -> s49 |
+ 0 @shift -> s50
+ ),
+ s54: (
+ '' -> final
+ )
+ ;
+}%%
+
+%% write data;
+
+Parser::Parser( )
+{ }
+
+int Parser::init( )
+{
+ %% write init;
+ return 0;
+}
+
+int Parser::execute( LangEl *_data, int _len )
+{
+ LangEl *p = _data;
+ LangEl *pe = _data+_len;
+ %% write exec;
+ if ( cs == Parser_error )
+ return -1;
+ if ( cs >= Parser_first_final )
+ return 1;
+ return 0;
+}
+
+int Parser::finish( )
+{
+ %% write eof;
+ if ( cs == Parser_error )
+ return -1;
+ if ( cs >= Parser_first_final )
+ return 1;
+ return 0;
+}
+
+void Parser::parseLangEl( LangEl *lel )
+{
+ redLel = 0;
+ execute( lel, 1 );
+ while ( redLel != 0 ) {
+ execute( redLel, 1 );
+ redLel = 0;
+ execute( lel, 1 );
+ }
+}
+
+int Parser::pop( int n )
+{
+ for ( int i = n-1; i >= 0; i-- )
+ rhs[i] = pop();
+ return rhs[0]->state;
+}
+
+void Parser::rem( LangEl *lel, int n )
+{
+ for ( int i = n-1; i >= 0; i-- )
+ delete rhs[i];
+}
+
+int Parser::done( )
+{
+ Token *eof = new Token;
+ eof->type = l__eof;
+ eof->line = 0;
+ eof->pos = 0;
+ parseLangEl( eof );
+ return finish();
+}
+
+#line 77 "tmp.gmr"
+
+
+#include <assert.h>
+#define MAX_TOKS 10000
+
+struct TokList
+{
+ TokList() : numToks(0) { }
+
+ void append( int type );
+ int parse();
+
+ Token *toks[MAX_TOKS];
+ int numToks;
+};
+
+void TokList::append( int type )
+{
+ assert( numToks < MAX_TOKS );
+ toks[numToks] = new Token;
+ toks[numToks]->type = type;
+ numToks += 1;
+}
+
+int TokList::parse()
+{
+ Parser parser;
+ parser.init();
+ for ( int i = 0; i < numToks; i++ )
+ parser.parseLangEl( toks[i] );
+ return parser.done();
+}
+
+void test0()
+{
+ TokList tokList;
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_id );
+ tokList.append( tt_star );
+ tokList.append( tt_minus );
+ tokList.append( tt_andFSM );
+ tokList.append( tt_dot );
+ tokList.append( tt_id );
+ tokList.append( tt_semi );
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_id );
+ tokList.append( tt_andFSM );
+ tokList.append( tt_id );
+ tokList.append( tt_semi );
+ cout << tokList.parse() << endl;
+}
+
+void test1()
+{
+ TokList tokList;
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_open );
+ tokList.append( tt_orFSM );
+ tokList.append( tt_minus );
+ tokList.append( tt_andFSM );
+ tokList.append( tt_close );
+ tokList.append( tt_star );
+ tokList.append( tt_semi );
+ cout << tokList.parse() << endl;
+}
+void test2()
+{
+ TokList tokList;
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_not );
+ tokList.append( tt_open );
+ tokList.append( tt_orFSM );
+ tokList.append( tt_minus );
+ tokList.append( tt_not );
+ tokList.append( tt_andFSM );
+ tokList.append( tt_close );
+ tokList.append( tt_star );
+ tokList.append( tt_semi );
+ cout << tokList.parse() << endl;
+}
+void test3()
+{
+ TokList tokList;
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_id );
+ tokList.append( tt_colon );
+ tokList.append( tt_minus );
+ tokList.append( tt_number );
+ tokList.append( tt_id );
+ tokList.append( tt_colon );
+ tokList.append( tt_id );
+ tokList.append( tt_id );
+ tokList.append( tt_dollar );
+ tokList.append( tt_plus );
+ tokList.append( tt_number );
+ tokList.append( tt_id );
+ tokList.append( tt_percent );
+ tokList.append( tt_minus );
+ tokList.append( tt_number );
+ tokList.append( tt_semi );
+ cout << tokList.parse() << endl;
+}
+void test4()
+{
+ TokList tokList;
+ tokList.append( tt_id );
+ tokList.append( tt_equals );
+ tokList.append( tt_id );
+ tokList.append( tt_pipe );
+ tokList.append( tt_id );
+ tokList.append( tt_amp );
+ tokList.append( tt_id );
+ tokList.append( tt_minus );
+ tokList.append( tt_id );
+ tokList.append( tt_semi );
+ cout << tokList.parse() << endl;
+}
+
+int main()
+{
+ test0();
+ test1();
+ test2();
+ test3();
+ test4();
+}
+
+#ifdef _____OUTPUT_____
+F = tt_id;
+K = F tt_star;
+N = K;
+T = N;
+E = T;
+F = tt_andFSM;
+K = F;
+N = K;
+T = N;
+F = tt_id;
+K = F;
+N = K;
+T = T tt_dot N;
+E = E tt_minus T;
+A = tt_id tt_equals E tt_semi;
+M = A;
+F = tt_id;
+K = F;
+N = K;
+T = N;
+F = tt_andFSM;
+K = F;
+N = K;
+T = T N;
+F = tt_id;
+K = F;
+N = K;
+T = T N;
+E = T;
+A = tt_id tt_equals E tt_semi;
+M = M A;
+start = M;
+1
+F = tt_orFSM;
+K = F;
+N = K;
+T = N;
+E = T;
+F = tt_andFSM;
+K = F;
+N = K;
+T = N;
+E = E tt_minus T;
+F = tt_open E tt_close;
+K = F tt_star;
+N = K;
+T = N;
+E = T;
+A = tt_id tt_equals E tt_semi;
+M = A;
+start = M;
+1
+F = tt_orFSM;
+K = F;
+N = K;
+T = N;
+E = T;
+F = tt_andFSM;
+K = tt_not F;
+N = K;
+T = N;
+E = E tt_minus T;
+F = tt_open E tt_close;
+K = tt_not F tt_star;
+N = K;
+T = N;
+E = T;
+A = tt_id tt_equals E tt_semi;
+M = A;
+start = M;
+1
+F = tt_id;
+K = F;
+N = K;
+N = N tt_colon tt_minus tt_number;
+T = N;
+F = tt_id;
+K = F;
+N = K;
+N = N tt_colon tt_id;
+T = T N;
+F = tt_id;
+K = F;
+N = K;
+N = N tt_dollar tt_plus tt_number;
+T = T N;
+F = tt_id;
+K = F;
+N = K;
+N = N tt_percent tt_minus tt_number;
+T = T N;
+E = T;
+A = tt_id tt_equals E tt_semi;
+M = A;
+start = M;
+1
+F = tt_id;
+K = F;
+N = K;
+T = N;
+E = T;
+F = tt_id;
+K = F;
+N = K;
+T = N;
+E = E tt_pipe T;
+F = tt_id;
+K = F;
+N = K;
+T = N;
+E = E tt_amp T;
+F = tt_id;
+K = F;
+N = K;
+T = N;
+E = E tt_minus T;
+A = tt_id tt_equals E tt_semi;
+M = A;
+start = M;
+1
+#endif
diff --git a/test/langtrans_c.sh b/test/langtrans_c.sh
new file mode 100755
index 0000000..7d9cf41
--- /dev/null
+++ b/test/langtrans_c.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+
+file=$1
+
+[ -f $file ] || exit 1
+
+# Get the amchine name.
+machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file`
+
+# Make a temporary version of the test case the C language translations.
+sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_c.txl > $file.pr
+
+# Begin writing out the test case.
+cat << EOF
+/*
+ * @LANG: c
+ * @GENERATED: yes
+ */
+#include <string.h>
+#include <stdio.h>
+EOF
+
+# Write the data declarations
+sed -n '/^%%$/q;p' $file.pr
+
+# Write out the machine specification.
+sed -n '/^%%{$/,/^}%%/p' $file.pr
+
+# Write out the init and execute routines.
+cat << EOF
+int cs;
+%% write data;
+void init()
+{
+EOF
+
+sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t/;p}' $file.pr
+
+cat << EOF
+ %% write init;
+}
+
+void exec( char *data, int len )
+{
+ char *p = data;
+ char *pe = data + len;
+ %% write exec;
+}
+
+void finish( )
+{
+ %% write eof;
+ if ( cs >= ${machine}_first_final )
+ printf( "ACCEPT\\n" );
+ else
+ printf( "FAIL\\n" );
+}
+EOF
+
+# Write out the test data.
+sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk '
+BEGIN {
+ print "char *inp[] = {"
+}
+{
+ print " " $0 ","
+}
+END {
+ print "};"
+ print ""
+ print "int inplen = " NR ";"
+}'
+
+# Write out the main routine.
+cat << EOF
+
+int main( )
+{
+ int i;
+ for ( i = 0; i < inplen; i++ ) {
+ init();
+ exec( inp[i], strlen(inp[i]) );
+ finish();
+ }
+ return 0;
+}
+#ifdef _____OUTPUT_____
+EOF
+
+# Write out the expected output.
+sed -n '1,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file
+echo "#endif"
+
+# Don't need this language-specific file anymore.
+rm $file.pr
diff --git a/test/langtrans_c.txl b/test/langtrans_c.txl
new file mode 100644
index 0000000..831350c
--- /dev/null
+++ b/test/langtrans_c.txl
@@ -0,0 +1,277 @@
+include "testcase.txl"
+
+define c_statements
+ [repeat c_lang_stmt]
+end define
+
+define c_lang_stmt
+ [al_ragel_stmt]
+ | [c_variable_decl]
+ | [c_expr_stmt]
+ | [c_if_stmt]
+ | [EX] '{ [IN] [NL] [c_statements] [EX] '} [IN] [NL]
+end define
+
+define c_variable_decl
+ [c_type_decl] [id] [opt union] '; [NL]
+end define
+
+define c_type_decl
+ [al_type_decl]
+ | 'char '*
+end define
+
+define c_expr_stmt
+ [c_expr] '; [NL]
+end define
+
+define c_expr
+ [c_term] [repeat c_expr_extend]
+end define
+
+define c_expr_extend
+ [al_expr_op] [c_term]
+end define
+
+define c_term
+ [al_term]
+ | [id] '( [c_args] ')
+end define
+
+define c_args
+ [list c_expr]
+end define
+
+define c_sign
+ '- | '+
+end define
+
+define c_if_stmt
+ 'if '( [c_expr] ') [NL] [IN]
+ [c_lang_stmt] [EX]
+ [opt c_else]
+end define
+
+define c_else
+ 'else [NL] [IN]
+ [c_lang_stmt] [EX]
+end define
+
+define c_lang
+ [c_statements]
+ '%% [NL]
+ [c_statements]
+ [ragel_def]
+end define
+
+define program
+ [lang_indep]
+ | [c_lang]
+end define
+
+redefine al_host_block
+ '{ [NL] [IN] [al_statements] [EX] '} [NL]
+ | '{ [NL] [IN] [c_statements] [EX] '} [NL]
+end define
+
+rule boolTypes
+ replace [al_type_decl]
+ 'bool
+ by
+ 'int
+end rule
+
+rule ptrTypes
+ replace [c_type_decl]
+ 'ptr
+ by
+ 'char '*
+end rule
+
+rule boolVals1
+ replace [al_term]
+ 'true
+ by
+ '1
+end rule
+
+rule boolVals2
+ replace [al_term]
+ 'false
+ by
+ '0
+end rule
+
+function alStmtToC1 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ VarDecl [al_variable_decl]
+ deconstruct VarDecl
+ Type [al_type_decl] Id [id] OptUnion [opt union]';
+ construct CType [c_type_decl]
+ Type
+ construct Result [c_variable_decl]
+ CType [boolTypes] [ptrTypes] Id OptUnion ';
+ replace [repeat c_lang_stmt]
+ by
+ Result
+end function
+
+function alExprExtendToC AlExprExtend [repeat al_expr_extend]
+ deconstruct AlExprExtend
+ Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
+ construct RestC [repeat c_expr_extend]
+ _ [alExprExtendToC Rest]
+ replace [repeat c_expr_extend]
+ by
+ Op Term RestC
+end function
+
+function alExprToC AlExpr [al_expr]
+ deconstruct AlExpr
+ ALTerm [al_term] AlExprExtend [repeat al_expr_extend]
+ construct CExprExtend [repeat c_expr_extend]
+ _ [alExprExtendToC AlExprExtend]
+ construct Result [opt c_expr]
+ ALTerm CExprExtend
+ replace [opt c_expr]
+ by
+ Result [boolVals1] [boolVals2]
+end function
+
+function alStmtToC2 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ AlExpr [al_expr] ';
+ construct OptCExpr [opt c_expr]
+ _ [alExprToC AlExpr]
+ deconstruct OptCExpr
+ CExpr [c_expr]
+ replace [repeat c_lang_stmt]
+ by
+ CExpr ';
+end function
+
+function alOptElseC AlOptElse [opt al_else]
+ deconstruct AlOptElse
+ 'else
+ AlSubStmt [action_lang_stmt]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct CSubStmts [repeat c_lang_stmt]
+ _ [alToC AlSubStmts]
+ deconstruct CSubStmts
+ CSubStmt [c_lang_stmt]
+ replace [opt c_else]
+ by
+ 'else
+ CSubStmt
+end function
+
+function alStmtToC3 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'if '( AlExpr [al_expr] ')
+ AlSubStmt [action_lang_stmt]
+ AlOptElse [opt al_else]
+ construct OptCExpr [opt c_expr]
+ _ [alExprToC AlExpr]
+ deconstruct OptCExpr
+ CExpr [c_expr]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct CSubStmts [repeat c_lang_stmt]
+ _ [alToC AlSubStmts]
+ deconstruct CSubStmts
+ CSubStmt [c_lang_stmt]
+ construct OptCElse [opt c_else]
+ _ [alOptElseC AlOptElse]
+ replace [repeat c_lang_stmt]
+ by
+ 'if '( CExpr ')
+ CSubStmt
+ OptCElse
+end function
+
+function alStmtToC4a AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printi Id [id] ';
+ replace [repeat c_lang_stmt]
+ by
+ 'printf '( '"%i" ', Id ');
+end function
+
+function alStmtToC4b AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'prints String [stringlit] ';
+ replace [repeat c_lang_stmt]
+ by
+ 'fputs '( String , 'stdout ');
+end function
+
+function alStmtToC5 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ '{ AlSubStmts [repeat action_lang_stmt] '}
+ construct CSubStmts [repeat c_lang_stmt]
+ _ [alToC AlSubStmts]
+ replace [repeat c_lang_stmt]
+ by
+ '{ CSubStmts '}
+end function
+
+function alStmtToC6 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ RagelStmt [al_ragel_stmt]
+ replace [repeat c_lang_stmt]
+ by
+ RagelStmt
+end function
+
+function alToC AlStmts [repeat action_lang_stmt]
+ deconstruct AlStmts
+ FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt]
+ construct FirstC [repeat c_lang_stmt]
+ _
+ [alStmtToC1 FirstStmt]
+ [alStmtToC2 FirstStmt]
+ [alStmtToC3 FirstStmt]
+ [alStmtToC4a FirstStmt]
+ [alStmtToC4b FirstStmt]
+ [alStmtToC5 FirstStmt]
+ [alStmtToC6 FirstStmt]
+ construct RestC [repeat c_lang_stmt]
+ _ [alToC Rest]
+ replace [repeat c_lang_stmt]
+ by
+ FirstC [. RestC]
+end function
+
+rule actionTransC
+ replace [al_host_block]
+ '{ AlStmts [repeat action_lang_stmt] '}
+ construct CStmts [repeat c_lang_stmt]
+ _ [alToC AlStmts]
+ by
+ '{ CStmts '}
+end rule
+
+function langTransC
+ replace [program]
+ Definitions [repeat action_lang_stmt]
+ '%%
+ Initializations [repeat action_lang_stmt]
+ RagelDef [ragel_def]
+ construct CDefinitions [repeat c_lang_stmt]
+ _ [alToC Definitions]
+ construct CInitializations [repeat c_lang_stmt]
+ _ [alToC Initializations]
+ by
+ CDefinitions
+ '%%
+ CInitializations
+ RagelDef [actionTransC]
+end function
+
+function main
+ replace [program]
+ P [program]
+ by
+ P [langTransC]
+end function
diff --git a/test/langtrans_d.sh b/test/langtrans_d.sh
new file mode 100755
index 0000000..117e50a
--- /dev/null
+++ b/test/langtrans_d.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#
+
+file=$1
+
+[ -f $file ] || exit 1
+
+# Get the amchine name.
+machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file`
+
+# Make a temporary version of the test case the D language translations.
+sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_d.txl > $file.pr
+
+# Begin writing out the test case.
+cat << EOF
+/*
+ * @LANG: d
+ * @GENERATED: yes
+ */
+import std.stdio;
+import std.string;
+
+class $machine
+{
+EOF
+
+# Write the data declarations
+sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr
+
+# Write out the machine specification.
+sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr
+
+# Write out the init and execute routines.
+cat << EOF
+ int cs;
+ %% write data;
+ void init()
+ {
+EOF
+
+sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr
+
+cat << EOF
+ %% write init;
+ }
+
+ void exec( char *data, int len )
+ {
+ char *p = data;
+ char *pe = data + len;
+ %% write exec;
+ }
+
+ void finish( )
+ {
+ %% write eof;
+ if ( cs >= ${machine}_first_final )
+ writefln( "ACCEPT" );
+ else
+ writefln( "FAIL" );
+ }
+
+EOF
+
+# Write out the test data.
+sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk '
+BEGIN {
+ print " char[][] inp = ["
+}
+{
+ print " " $0 ","
+}
+END {
+ print " ];"
+ print ""
+ print " int inplen = " NR ";"
+}'
+
+# Write out the main routine.
+cat << EOF
+}
+
+int main( )
+{
+ $machine m = new $machine();
+ int i;
+ for ( i = 0; i < m.inplen; i++ ) {
+ m.init();
+ m.exec( m.inp[i], m.inp[i].length );
+ m.finish();
+ }
+ return 0;
+}
+/* _____OUTPUT_____
+EOF
+
+# Write out the expected output.
+sed -n '1,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file
+echo "*/"
+
+# Don't need this language-specific file anymore.
+rm $file.pr
diff --git a/test/langtrans_d.txl b/test/langtrans_d.txl
new file mode 100644
index 0000000..a9151f9
--- /dev/null
+++ b/test/langtrans_d.txl
@@ -0,0 +1,256 @@
+include "testcase.txl"
+
+define d_statements
+ [repeat d_lang_stmt]
+end define
+
+define d_lang_stmt
+ [al_ragel_stmt]
+ | [d_variable_decl]
+ | [d_expr_stmt]
+ | [d_if_stmt]
+ | [EX] '{ [IN] [NL] [d_statements] [EX] '} [IN] [NL]
+end define
+
+define d_variable_decl
+ [d_type_decl] [id] [opt union] '; [NL]
+end define
+
+define d_type_decl
+ [al_type_decl]
+ | 'char '*
+end define
+
+define d_expr_stmt
+ [d_expr] '; [NL]
+end define
+
+define d_expr
+ [d_term] [repeat d_expr_extend]
+end define
+
+define d_expr_extend
+ [al_expr_op] [d_term]
+end define
+
+define d_term
+ [al_term]
+ | [id] '( [d_args] ')
+end define
+
+define d_args
+ [list d_expr]
+end define
+
+define d_sign
+ '- | '+
+end define
+
+define d_if_stmt
+ 'if '( [d_expr] ') [NL] [IN]
+ [d_lang_stmt] [EX]
+ [opt d_else]
+end define
+
+define d_else
+ 'else [NL] [IN]
+ [d_lang_stmt] [EX]
+end define
+
+define d_lang
+ [d_statements]
+ '%% [NL]
+ [d_statements]
+ [ragel_def]
+end define
+
+define program
+ [lang_indep]
+ | [d_lang]
+end define
+
+redefine al_host_block
+ '{ [NL] [IN] [al_statements] [EX] '} [NL]
+ | '{ [NL] [IN] [d_statements] [EX] '} [NL]
+end define
+
+rule ptrTypes
+ replace [d_type_decl]
+ 'ptr
+ by
+ 'char '*
+end rule
+
+function alStmtToD1 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ VarDecl [al_variable_decl]
+ deconstruct VarDecl
+ Type [al_type_decl] Id [id] OptUnion [opt union] ';
+ construct DType [d_type_decl]
+ Type
+ construct Result [d_variable_decl]
+ DType [ptrTypes] Id OptUnion ';
+ replace [repeat d_lang_stmt]
+ by
+ Result
+end function
+
+function alExprExtendToD AlExprExtend [repeat al_expr_extend]
+ deconstruct AlExprExtend
+ Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
+ construct DRest [repeat d_expr_extend]
+ _ [alExprExtendToD Rest]
+ replace [repeat d_expr_extend]
+ by
+ Op Term DRest
+end function
+
+function alExprToD AlExpr [al_expr]
+ deconstruct AlExpr
+ ALTerm [al_term] AlExprExtend [repeat al_expr_extend]
+ construct DExprExtend [repeat d_expr_extend]
+ _ [alExprExtendToD AlExprExtend]
+ construct Result [opt d_expr]
+ ALTerm DExprExtend
+ replace [opt d_expr]
+ by
+ Result
+end function
+
+function alStmtToD2 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ AlExpr [al_expr] ';
+ construct OptDExpr [opt d_expr]
+ _ [alExprToD AlExpr]
+ deconstruct OptDExpr
+ DExpr [d_expr]
+ replace [repeat d_lang_stmt]
+ by
+ DExpr ';
+end function
+
+function alOptElseD AlOptElse [opt al_else]
+ deconstruct AlOptElse
+ 'else
+ AlSubStmt [action_lang_stmt]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct DSubStmts [repeat d_lang_stmt]
+ _ [alToD AlSubStmts]
+ deconstruct DSubStmts
+ DSubStmt [d_lang_stmt]
+ replace [opt d_else]
+ by
+ 'else
+ DSubStmt
+end function
+
+function alStmtToD3 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'if '( AlExpr [al_expr] ')
+ AlSubStmt [action_lang_stmt]
+ AlOptElse [opt al_else]
+ construct OptDExpr [opt d_expr]
+ _ [alExprToD AlExpr]
+ deconstruct OptDExpr
+ DExpr [d_expr]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct DSubStmts [repeat d_lang_stmt]
+ _ [alToD AlSubStmts]
+ deconstruct DSubStmts
+ DSubStmt [d_lang_stmt]
+ construct OptDElse [opt d_else]
+ _ [alOptElseD AlOptElse]
+ replace [repeat d_lang_stmt]
+ by
+ 'if '( DExpr ')
+ DSubStmt
+ OptDElse
+end function
+
+function alStmtToD4a AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printi Id [id] ';
+ replace [repeat d_lang_stmt]
+ by
+ 'writef '( '"%d" ', Id ');
+end function
+
+function alStmtToD4b AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'prints String [stringlit] ';
+ replace [repeat d_lang_stmt]
+ by
+ 'writef '( '"%s" ', String ');
+end function
+
+function alStmtToD5 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ '{ AlSubStmts [repeat action_lang_stmt] '}
+ construct DSubStmts [repeat d_lang_stmt]
+ _ [alToD AlSubStmts]
+ replace [repeat d_lang_stmt]
+ by
+ '{ DSubStmts '}
+end function
+
+function alStmtToD6 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ RagelStmt [al_ragel_stmt]
+ replace [repeat d_lang_stmt]
+ by
+ RagelStmt
+end function
+
+function alToD AlStmts [repeat action_lang_stmt]
+ deconstruct AlStmts
+ FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt]
+ construct DFirst [repeat d_lang_stmt]
+ _
+ [alStmtToD1 FirstStmt]
+ [alStmtToD2 FirstStmt]
+ [alStmtToD3 FirstStmt]
+ [alStmtToD4a FirstStmt]
+ [alStmtToD4b FirstStmt]
+ [alStmtToD5 FirstStmt]
+ [alStmtToD6 FirstStmt]
+ construct DRest [repeat d_lang_stmt]
+ _ [alToD Rest]
+ replace [repeat d_lang_stmt]
+ by
+ DFirst [. DRest]
+end function
+
+rule actionTransD
+ replace [al_host_block]
+ '{ AlStmts [repeat action_lang_stmt] '}
+ construct DStmts [repeat d_lang_stmt]
+ _ [alToD AlStmts]
+ by
+ '{ DStmts '}
+end rule
+
+function langTransD
+ replace [program]
+ Definitions [repeat action_lang_stmt]
+ '%%
+ Initializations [repeat action_lang_stmt]
+ RagelDef [ragel_def]
+ construct DDefinitions [repeat d_lang_stmt]
+ _ [alToD Definitions]
+ construct DInitializations [repeat d_lang_stmt]
+ _ [alToD Initializations]
+ by
+ DDefinitions
+ '%%
+ DInitializations
+ RagelDef [actionTransD]
+end function
+
+function main
+ replace [program]
+ P [program]
+ by
+ P [langTransD]
+end function
diff --git a/test/langtrans_java.sh b/test/langtrans_java.sh
new file mode 100755
index 0000000..65b6184
--- /dev/null
+++ b/test/langtrans_java.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+
+file=$1
+
+[ -f $file ] || exit 1
+root=${file%.rl}
+class=${root}_java
+
+# Make a temporary version of the test case the Java language translations.
+sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_java.txl - $class > $file.pr
+
+# Begin writing out the test case.
+cat << EOF
+/*
+ * @LANG: java
+ * @ALLOW_GENFLAGS: -T0
+ * @GENERATED: yes
+ */
+
+class $class
+{
+EOF
+
+# Write the data declarations
+sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr
+
+# Write out the machine specification.
+sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr
+
+# Write out the init and execute routines.
+cat << EOF
+
+ int cs;
+ %% write data;
+
+ void init()
+ {
+EOF
+
+sed -n '1,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr
+
+cat << EOF
+ %% write init;
+ }
+
+ void exec( char data[], int len )
+ {
+ int p = 0;
+ int pe = len;
+ %% write exec;
+ }
+
+ void finish( )
+ {
+ %% write eof;
+ if ( cs >= ${class}_first_final )
+ System.out.println( "ACCEPT" );
+ else
+ System.out.println( "FAIL" );
+ }
+
+EOF
+
+# Write out the test data.
+sed -n '1,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk '
+BEGIN {
+ print " static final String inp[] = {"
+}
+{
+ print " " $0 ","
+}
+END {
+ print " };"
+ print ""
+ print " static final int inplen = " NR ";"
+}'
+
+
+# Write out the main routine.
+cat << EOF
+
+ public static void main (String[] args)
+ {
+ $class machine = new $class();
+ for ( int i = 0; i < inplen; i++ ) {
+ machine.init();
+ machine.exec( inp[i].toCharArray(), inp[i].length() );
+ machine.finish();
+ }
+ }
+}
+
+EOF
+
+# Write out the expected output.
+sed -n '/\/\* _____OUTPUT_____/,/_____OUTPUT_____ \*\//p;' $file
+
+# Don't need this language-specific file anymore.
+rm $file.pr
diff --git a/test/langtrans_java.txl b/test/langtrans_java.txl
new file mode 100644
index 0000000..3f1755d
--- /dev/null
+++ b/test/langtrans_java.txl
@@ -0,0 +1,303 @@
+include "testcase.txl"
+
+keys
+ 'boolean 'new
+end keys
+
+
+define java_statements
+ [repeat java_lang_stmt]
+end define
+
+define java_lang_stmt
+ [al_ragel_stmt]
+ | [java_variable_decl]
+ | [java_expr_stmt]
+ | [java_if_stmt]
+ | [EX] '{ [IN] [NL] [java_statements] [EX] '} [IN] [NL]
+end define
+
+define java_variable_decl
+ [java_type_decl] [id] [opt union] '; [NL]
+end define
+
+define java_type_decl
+ [al_type_decl]
+ | 'boolean
+end define
+
+define java_expr_stmt
+ [java_expr] '; [NL]
+end define
+
+define java_expr
+ [java_term] [repeat java_expr_extend]
+end define
+
+define java_expr_extend
+ [al_expr_op] [java_term]
+end define
+
+define java_term
+ [al_term]
+ | [id] [repeat java_dot_id]
+ | [id] [repeat java_dot_id] '( [java_args] ')
+ | 'new [java_type_decl] [union]
+end define
+
+define java_dot_id
+ '. [id]
+end define
+
+define java_args
+ [list java_expr]
+end define
+
+define java_sign
+ '- | '+
+end define
+
+define java_if_stmt
+ 'if '( [java_expr] ') [NL] [IN]
+ [java_lang_stmt] [EX]
+ [opt java_else]
+end define
+
+define java_else
+ 'else [NL] [IN]
+ [java_lang_stmt] [EX]
+end define
+
+define java_lang
+ [java_statements]
+ '%% [NL]
+ [java_statements]
+ [ragel_def]
+end define
+
+define program
+ [lang_indep]
+ | [java_lang]
+end define
+
+redefine al_host_block
+ '{ [NL] [IN] [al_statements] [EX] '} [NL]
+ | '{ [NL] [IN] [java_statements] [EX] '} [NL]
+end define
+
+function clearUnion Type [java_type_decl] Id [id]
+ replace [opt union]
+ Union [union]
+ import ArrayInits [java_statements]
+ Stmts [repeat java_lang_stmt]
+ export ArrayInits
+ Id '= 'new Type Union '; Stmts
+ by
+ '[]
+end function
+
+rule boolTypes
+ replace [java_type_decl]
+ 'bool
+ by
+ 'boolean
+end rule
+
+rule ptrTypes
+ replace [al_type_decl]
+ 'ptr
+ by
+ 'int
+end rule
+
+function alStmtToJava1 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ VarDecl [al_variable_decl]
+ deconstruct VarDecl
+ Type [al_type_decl] Id [id] OptUnion [opt union] ';
+ construct JavaType [java_type_decl]
+ Type
+ construct Result [java_variable_decl]
+ JavaType [boolTypes] [ptrTypes] Id OptUnion [clearUnion JavaType Id] ';
+ replace [repeat java_lang_stmt]
+ by
+ Result
+end function
+
+function alExprExtendToJava AlExprExtend [repeat al_expr_extend]
+ deconstruct AlExprExtend
+ Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend]
+ construct JavaRest [repeat java_expr_extend]
+ _ [alExprExtendToJava Rest]
+ replace [repeat java_expr_extend]
+ by
+ Op Term JavaRest
+end function
+
+function alExprToJava AlExpr [al_expr]
+ deconstruct AlExpr
+ ALTerm [al_term] AlExprExtend [repeat al_expr_extend]
+ construct JavaExprExtend [repeat java_expr_extend]
+ _ [alExprExtendToJava AlExprExtend]
+ construct Result [opt java_expr]
+ ALTerm JavaExprExtend
+ replace [opt java_expr]
+ by
+ Result
+end function
+
+function alStmtToJava2 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ AlExpr [al_expr] ';
+ construct OptJavaExpr [opt java_expr]
+ _ [alExprToJava AlExpr]
+ deconstruct OptJavaExpr
+ JavaExpr [java_expr]
+ replace [repeat java_lang_stmt]
+ by
+ JavaExpr ';
+end function
+
+function alOptElseJava AlOptElse [opt al_else]
+ deconstruct AlOptElse
+ 'else
+ AlSubStmt [action_lang_stmt]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct JavaSubStmts [repeat java_lang_stmt]
+ _ [alToJava AlSubStmts]
+ deconstruct JavaSubStmts
+ JavaSubStmt [java_lang_stmt]
+ replace [opt java_else]
+ by
+ 'else
+ JavaSubStmt
+end function
+
+function alStmtToJava3 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'if '( AlExpr [al_expr] ')
+ AlSubStmt [action_lang_stmt]
+ AlOptElse [opt al_else]
+ construct OptJavaExpr [opt java_expr]
+ _ [alExprToJava AlExpr]
+ deconstruct OptJavaExpr
+ JavaExpr [java_expr]
+ construct AlSubStmts [repeat action_lang_stmt]
+ AlSubStmt
+ construct JavaSubStmts [repeat java_lang_stmt]
+ _ [alToJava AlSubStmts]
+ deconstruct JavaSubStmts
+ JavaSubStmt [java_lang_stmt]
+ construct OptJavaElse [opt java_else]
+ _ [alOptElseJava AlOptElse]
+ replace [repeat java_lang_stmt]
+ by
+ 'if '( JavaExpr ')
+ JavaSubStmt
+ OptJavaElse
+end function
+
+function alStmtToJava4a AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'printi Id [id] ';
+ replace [repeat java_lang_stmt]
+ by
+ 'System '. 'out '. 'print '( Id ');
+end function
+
+function alStmtToJava4b AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ 'prints String [stringlit] ';
+ replace [repeat java_lang_stmt]
+ by
+ 'System '. 'out '. 'print '( String ');
+end function
+
+function alStmtToJava5 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ '{ AlSubStmts [repeat action_lang_stmt] '}
+ construct JavaSubStmts [repeat java_lang_stmt]
+ _ [alToJava AlSubStmts]
+ replace [repeat java_lang_stmt]
+ by
+ '{ JavaSubStmts '}
+end function
+
+function alStmtToJava6 AlStmt [action_lang_stmt]
+ deconstruct AlStmt
+ RagelStmt [al_ragel_stmt]
+ replace [repeat java_lang_stmt]
+ by
+ RagelStmt
+end function
+
+
+function alToJava AlStmts [repeat action_lang_stmt]
+ deconstruct AlStmts
+ FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt]
+ construct JavaFirst [repeat java_lang_stmt]
+ _
+ [alStmtToJava1 FirstStmt]
+ [alStmtToJava2 FirstStmt]
+ [alStmtToJava3 FirstStmt]
+ [alStmtToJava4a FirstStmt]
+ [alStmtToJava4b FirstStmt]
+ [alStmtToJava5 FirstStmt]
+ [alStmtToJava6 FirstStmt]
+ construct JavaRest [repeat java_lang_stmt]
+ _ [alToJava Rest]
+ replace [repeat java_lang_stmt]
+ by
+ JavaFirst [. JavaRest]
+end function
+
+rule actionTransJava
+ replace [al_host_block]
+ '{ AlStmts [repeat action_lang_stmt] '}
+ construct JavaStmts [repeat java_lang_stmt]
+ _ [alToJava AlStmts]
+ by
+ '{ JavaStmts '}
+end rule
+
+rule machineName
+ replace $ [machine_stmt]
+ 'machine _ [id] ';
+ import TXLargs [repeat stringlit]
+ Arg1 [stringlit] _ [repeat stringlit]
+ construct ClassName [id]
+ _ [unquote Arg1]
+ by
+ 'machine ClassName ';
+end rule
+
+function langTransJava
+ replace [program]
+ Definitions [repeat action_lang_stmt]
+ '%%
+ Initializations [repeat action_lang_stmt]
+ RagelDef [ragel_def]
+ construct JavaDefinitions [repeat java_lang_stmt]
+ _ [alToJava Definitions]
+ construct JavaInitializations [repeat java_lang_stmt]
+ _ [alToJava Initializations]
+ construct NewRagelDef [ragel_def]
+ RagelDef [actionTransJava] [machineName]
+ import ArrayInits [java_statements]
+ ArrayInitStmts [repeat java_lang_stmt]
+ by
+ JavaDefinitions
+ '%%
+ ArrayInitStmts [. JavaInitializations]
+ NewRagelDef
+end function
+
+function main
+ replace [program]
+ P [program]
+ export ArrayInits [java_statements]
+ _
+ by
+ P [langTransJava]
+end function
diff --git a/test/lmgoto.rl b/test/lmgoto.rl
new file mode 100644
index 0000000..96c4392
--- /dev/null
+++ b/test/lmgoto.rl
@@ -0,0 +1,198 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using namespace std;
+
+#define TK_Dlit 192
+#define TK_Slit 193
+#define TK_Float 194
+#define TK_Id 195
+#define TK_NameSep 197
+#define TK_Arrow 211
+#define TK_PlusPlus 212
+#define TK_MinusMinus 213
+#define TK_ArrowStar 214
+#define TK_DotStar 215
+#define TK_ShiftLeft 216
+#define TK_ShiftRight 217
+#define TK_IntegerDecimal 218
+#define TK_IntegerOctal 219
+#define TK_IntegerHex 220
+#define TK_EqualsEquals 223
+#define TK_NotEquals 224
+#define TK_AndAnd 225
+#define TK_OrOr 226
+#define TK_MultAssign 227
+#define TK_DivAssign 228
+#define TK_PercentAssign 229
+#define TK_PlusAssign 230
+#define TK_MinusAssign 231
+#define TK_AmpAssign 232
+#define TK_CaretAssign 233
+#define TK_BarAssign 234
+#define TK_DotDotDot 240
+#define TK_Whitespace 241
+#define TK_Comment 242
+
+struct Scanner
+{
+ int cs, act;
+ char *tokstart, *tokend;
+ bool isCxx;
+
+ void token( int tok );
+ void run( char *buf );
+};
+
+
+%%{
+ machine Scanner;
+
+ # Process all comments, relies on isCxx being set.
+ comment := |*
+ '*/' {
+ if ( ! isCxx )
+ fgoto main;
+ else {
+ cout << "comm char: " << tokstart[0] << endl;
+ cout << "comm char: " << tokstart[1] << endl;
+ }
+ };
+
+ '\n' {
+ if ( isCxx )
+ fgoto main;
+ else
+ cout << "comm char: " << tokstart[0] << endl;
+ };
+
+ any {
+ cout << "comm char: " << tokstart[0] << endl;
+ };
+ *|;
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id ); };
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? ) { token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' {token( TK_NameSep );};
+ '==' {token( TK_EqualsEquals );};
+ '!=' {token( TK_NotEquals );};
+ '&&' {token( TK_AndAnd );};
+ '||' {token( TK_OrOr );};
+ '*=' {token( TK_MultAssign );};
+ '/=' {token( TK_DivAssign );};
+ '%=' {token( TK_PercentAssign );};
+ '+=' {token( TK_PlusAssign );};
+ '-=' {token( TK_MinusAssign );};
+ '&=' {token( TK_AmpAssign );};
+ '^=' {token( TK_CaretAssign );};
+ '|=' {token( TK_BarAssign );};
+ '++' {token( TK_PlusPlus );};
+ '--' {token( TK_MinusMinus );};
+ '->' {token( TK_Arrow );};
+ '->*' {token( TK_ArrowStar );};
+ '.*' {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' { token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) { token( tokstart[0] );};
+
+ # Comments and whitespace. Handle these outside of the machine so that se
+ # don't end up buffering the comments.
+ '/*' { isCxx = false; fgoto comment; };
+ '//' { isCxx = true; fgoto comment; };
+
+ ( any - 33..126 )+ { token( TK_Whitespace );};
+
+ *|;
+}%%
+
+%% write data nofinal;
+
+void Scanner::token( int tok )
+{
+ const char *data = tokstart;
+ int len = tokend - tokstart;
+ cout << "<" << tok << "> ";
+ if ( data != 0 ) {
+ for ( int i = 0; i < len; i++ )
+ cout << data[i];
+ }
+ cout << '\n';
+}
+
+void Scanner::run( char *buf )
+{
+ int len = strlen( buf );
+ %% write init;
+ char *p = buf;
+ char *pe = buf + len;
+ %% write exec;
+
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cout << "PARSE ERROR" << endl;
+ }
+ %% write eof;
+}
+
+int main()
+{
+ Scanner scanner;
+ scanner.run(
+ "//hello*/\n"
+ "/*hi there*/ hello 0x88\n"
+ );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+comm char: h
+comm char: e
+comm char: l
+comm char: l
+comm char: o
+comm char: *
+comm char: /
+comm char: h
+comm char: i
+comm char:
+comm char: t
+comm char: h
+comm char: e
+comm char: r
+comm char: e
+<241>
+<195> hello
+<241>
+<220> 0x88
+#endif
diff --git a/test/mailbox1.h b/test/mailbox1.h
new file mode 100644
index 0000000..bf9a87e
--- /dev/null
+++ b/test/mailbox1.h
@@ -0,0 +1,33 @@
+#ifndef _MAILBOX1_H
+#define _MAILBOX1_H
+
+#include <stdio.h>
+#include <string.h>
+#include "vector.h"
+
+struct MBox
+{
+ int cs;
+
+ Vector<char> headName;
+ Vector<char> headContent;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ void execute( char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+#endif
diff --git a/test/mailbox1.rl b/test/mailbox1.rl
new file mode 100644
index 0000000..89e8775
--- /dev/null
+++ b/test/mailbox1.rl
@@ -0,0 +1,252 @@
+/*
+ * @LANG: c++
+ * @CFLAGS: -I../aapl
+ * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P
+ */
+
+/*
+ * Parses unix mail boxes into headers and bodies.
+ */
+
+#include "mailbox1.h"
+
+%%{
+ machine MBox;
+
+ # Buffer the header names.
+ action bufHeadName { fsm->headName.append(fc); }
+
+ # Buffer the header content.
+ action bufHeadContent { fsm->headContent.append(fc); }
+
+ # Terminate a header. If it is an interesting header then prints it.
+ action finBufHeadContent {
+ /* Terminate the buffers. */
+ fsm->headName.append(0);
+ fsm->headContent.append(0);
+
+ /* Print the header. Interesting headers. */
+ printf("%s:%s\n", fsm->headName.data, fsm->headContent.data);
+
+ /* Clear for the next time we use them. */
+ fsm->headName.empty();
+ fsm->headContent.empty();
+ }
+
+ action msgstart{
+ printf("NEW MESSAGE\n");
+ }
+
+ # Prints a blank line after the end of the headers of each message.
+ action blankLine {
+ printf("\n");
+ }
+
+ # Helpers we will use in matching the date section of the from line.
+ day = /[A-Z][a-z][a-z]/;
+ month = /[A-Z][a-z][a-z]/;
+ year = /[0-9][0-9][0-9][0-9]/;
+ time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
+ letterZone = /[A-Z][A-Z][A-Z]/;
+ numZone = /[+\-][0-9][0-9][0-9][0-9]/;
+ zone = letterZone | numZone;
+ dayNum = /[0-9 ][0-9]/;
+
+ # These are the different formats of the date minus an obscure
+ # type that has a funny string 'remote from xxx' on the end. Taken
+ # from c-client in the imap-2000 distribution.
+ date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
+ ( year | year . ' ' . zone | zone . ' ' . year );
+
+ # Note the priority assignment on the end of the from line. While we
+ # matching the body of a message we may enter into this machine. We will
+ # not leave the body of the previous message until this entire from line is
+ # matched.
+ fromLine = 'From ' . /[^\n]/* . ' ' . date . '\n' @(new_msg,1) @msgstart;
+
+ # The types of characters that can be used as a header name.
+ hchar = print - [ :];
+
+ header =
+ # The name of the header.
+ hchar+ $bufHeadName . ':'
+ # The content of the header. Look out for continuations.
+ . ( (extend - '\n') $bufHeadContent | '\n'. [ \t] @bufHeadContent )*
+ # Buffer must end with a newline that does not continue.
+ . '\n' %finBufHeadContent;
+
+ messageLine = ( extend - '\n' )* . '\n' @(new_msg, 0);
+
+ # When we get to the last newline we are still matching messageLine
+ # so on the last newline it will think we are still in the message.
+ # We need this because we can't assume that every newline means
+ # the end of the current message, whereas at the same time we requre
+ # that there be a newline before the fromLine of the next message.
+ message = ( fromLine . header* . '\n' @blankLine . messageLine* . '\n' );
+
+ # Its important that the priority in the fromLine gets bumped up
+ # so that we are able to move to new messages. Otherwise we
+ # will always stay in the message body of the first message.
+ main := message*;
+}%%
+
+%% write data;
+
+void MBox::init( )
+{
+ MBox *fsm = this;
+ %% write init;
+}
+
+void MBox::execute( char *data, int len )
+{
+ MBox *fsm = this;
+ char *p = data;
+ char *pe = data + len;
+ %%{
+ access fsm->;
+ write exec;
+ }%%
+}
+
+int MBox::finish( )
+{
+ if ( cs == MBox_error )
+ return -1;
+ if ( cs >= MBox_first_final )
+ return 1;
+ return 0;
+}
+
+MBox mbox;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ mbox.init();
+ mbox.execute( buf, len );
+ if ( mbox.finish() > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test(
+ "From email address goes here Wed Nov 28 13:30:05 2001 -0500\n"
+ "Header1: this is the header contents\n"
+ " there is more on the second line\n"
+ " and more on the third line.\n"
+ "Header2: slkdj\n"
+ "\n"
+ "This is the message data\n"
+ "\n"
+ "From email Wed Nov 28 13:30:05 2001 -0500\n"
+ "Header: \n"
+ "\n"
+ "mail message\n"
+ "\n"
+ );
+
+ test(
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "\n"
+ "There are no headers. \n"
+ "\n"
+ "From email Wed Nov 28 13:30:05 EST 2000\n"
+ "\n"
+ "There are no headers.\n"
+ "\n"
+ );
+
+ test(
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "Header:alsdj\n"
+ "\n"
+ "Header:\n"
+ "salkfj\n"
+ "\n"
+ "There are no headers. \n"
+ "\n"
+ );
+
+ test(
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "Header:alsdj\n"
+ "\n"
+ "Header:\n"
+ "salkfj\n"
+ "\n"
+ "There are no headers. \n"
+ "\n"
+ ">From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "\n"
+ );
+
+ test(
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "Header:alsdj\n"
+ "\n"
+ "Header:\n"
+ "salkfj\n"
+ "\n"
+ "There are no headers. \n"
+ "\n"
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "\n"
+ );
+
+ test(
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "Header:alsdj\n"
+ "\n"
+ "Header:\n"
+ "salkfj\n"
+ "\n"
+ "There are no headers. \n"
+ "\n"
+ "From user@host.dom Wed Nov 28 13:30:05 2001\n"
+ "\n"
+ "\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+NEW MESSAGE
+Header1: this is the header contents there is more on the second line and more on the third line.
+Header2: slkdj
+
+NEW MESSAGE
+Header:
+
+ACCEPT
+NEW MESSAGE
+
+NEW MESSAGE
+
+ACCEPT
+NEW MESSAGE
+Header:alsdj
+
+ACCEPT
+NEW MESSAGE
+Header:alsdj
+
+ACCEPT
+NEW MESSAGE
+Header:alsdj
+
+NEW MESSAGE
+
+FAIL
+NEW MESSAGE
+Header:alsdj
+
+NEW MESSAGE
+
+ACCEPT
+#endif
diff --git a/test/mailbox2.rl b/test/mailbox2.rl
new file mode 100644
index 0000000..d84696d
--- /dev/null
+++ b/test/mailbox2.rl
@@ -0,0 +1,173 @@
+/*
+ * @LANG: c++
+ * @CFLAGS: -I../aapl
+ */
+
+#include <iostream>
+#include <string.h>
+
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+%%{
+ machine mailbox;
+
+ action prn_char { cout << *p; }
+ action prn_space { cout << ' '; }
+ action prn_word { cout.write(ws, p-ws); cout << ' '; }
+ action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); }
+ action prn_addr2 { cout << "| "; cout.write(ws, p-ws); }
+ action prn_tab { cout << '\t'; }
+ action prn_nl { cout << '\n'; }
+ action prn_separator { cout << "------\n"; }
+ action prn_from { cout << "FROM\n"; }
+ action prn_to { cout << "TO\n"; }
+ action prn_subj { cout << "SUBJECT\n"; }
+
+ action start_word { ws = p; }
+ action start_headers { preserve = p; }
+ action end_headers {preserve = 0;}
+
+ day = upper lower{2};
+ month = upper lower{2};
+ year = digit{4};
+ time = digit{2} ':' digit{2}
+ ( ':' digit{2} )?;
+ letterZone = upper{3};
+ numZone = [+\-] digit{4};
+ zone = letterZone | numZone;
+ dayNum = ( digit | ' ' ) digit;
+
+ date = day ' ' month ' '
+ dayNum ' ' time ' '
+ (
+ year |
+ year ' ' zone |
+ zone ' ' year
+ );
+
+ fromLine = 'From ' [^\n]* ' '
+ date '\n' @start_headers;
+
+ headerChar = print - [ :];
+ headersToPrint = 'From' |
+ 'To' | 'Subject';
+ headersToConsume =
+ headerChar+ - headersToPrint;
+
+ consumeHeader =
+ headersToConsume ':'
+ (
+ [^\n] |
+ ( '\n' [ \t] )
+ )*
+ '\n';
+
+ addrWS = ( [ \t]+ | '\n' [ \t]+ );
+ addrComment = '(' [^)]* ')';
+ addrWord = [^"'@,<>() \t\n]+;
+ addrAddr1 = '<' [^>]* '>';
+ addrAddr2 = addrWord '@' addrWord;
+ addrString =
+ '"' [^"]* '"' |
+ "'" [^']* "'";
+
+ addrItem = (
+ addrAddr1 %prn_addr1 |
+ addrAddr2 %prn_addr2 |
+ addrWord %prn_word |
+ addrString %prn_word
+ ) >start_word;
+
+ address = (
+ addrWS |
+ addrComment |
+ addrItem
+ )** >prn_tab;
+
+ addrHeader = (
+ 'From' %prn_from |
+ 'To' %prn_to
+ ) ':'
+ address ( ',' @prn_nl address )*
+ '\n' %prn_nl;
+
+ subjectHeader =
+ 'Subject:' @prn_subj @prn_tab
+ ' '* <:
+ (
+ [^\n] @prn_char |
+ ( '\n' [ \t]+ ) %prn_space
+ )**
+ '\n' %prn_nl;
+
+ header = consumeHeader |
+ addrHeader | subjectHeader;
+
+ messageLine =
+ ( [^\n]* '\n' - fromLine );
+
+ main := (
+ fromLine %prn_separator
+ header*
+ '\n' @end_headers
+ messageLine*
+ )*;
+ }%%
+
+%% write data;
+
+#define BUFSIZE 8192
+
+void test( char *buf )
+{
+ int cs, len = strlen( buf );
+ char *preserve = 0, *ws = 0;
+
+ %% write init;
+ char *p = buf;
+ char *pe = p + len;
+ %% write exec;
+
+ if ( cs == mailbox_error )
+ cerr << "ERROR" << endl;
+
+ if ( cs < mailbox_first_final )
+ cerr << "DID NOT FINISH IN A FINAL STATE" << endl;
+}
+
+int main()
+{
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "From: \"Adrian D. Thurston\" <thurston@cs.queensu.ca>\n"
+ "Subject: the squirrel has landed\n"
+ "\n"
+ "Message goes here. \n"
+ "From (trick from line).\n"
+ "From: not really a header\n"
+ "\n"
+ "From user2@host2.com Wed Nov 28 13:30:05 2001\n"
+ "To: Edgar Allen Poe <ep@net.com> (da man)\n"
+ "Subject: (no subject) \n"
+ "\n"
+ "Message goes here. \n"
+ "\n"
+ );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+------
+FROM
+ "Adrian D. Thurston" | thurston@cs.queensu.ca
+SUBJECT
+ the squirrel has landed
+------
+TO
+ Edgar Allen Poe | ep@net.com
+SUBJECT
+ (no subject)
+#endif
diff --git a/test/mailbox3.rl b/test/mailbox3.rl
new file mode 100644
index 0000000..e8089bb
--- /dev/null
+++ b/test/mailbox3.rl
@@ -0,0 +1,247 @@
+/*
+ * @LANG: c++
+ * @CFLAGS: -I../aapl
+ */
+
+#include <iostream>
+#include <string.h>
+
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+%%{
+ machine mailbox;
+
+ action prn_char { cout << *p; }
+ action prn_space { cout << ' '; }
+ action prn_word { cout.write(ws, p-ws); cout << ' '; }
+ action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); }
+ action prn_addr2 { cout << "| "; cout.write(ws, p-ws); }
+ action prn_tab { cout << '\t'; }
+ action prn_nl { cout << '\n'; }
+ action prn_separator { cout << "------\n"; }
+ action prn_from { cout << "FROM\n"; }
+ action prn_to { cout << "TO\n"; }
+ action prn_subj { cout << "SUBJECT\n"; }
+
+ action start_word { ws = p; }
+ action start_headers { preserve = p; }
+ action end_headers {preserve = 0;}
+
+ day = upper lower{2};
+ month = upper lower{2};
+ year = digit{4};
+ time = digit{2} ':' digit{2}
+ ( ':' digit{2} )?;
+ letterZone = upper{3};
+ numZone = [+\-] digit{4};
+ zone = letterZone | numZone;
+ dayNum = ( digit | ' ' ) digit;
+
+ date = day ' ' month ' '
+ dayNum ' ' time ' '
+ (
+ year |
+ year ' ' zone |
+ zone ' ' year
+ );
+
+ fromLine = 'From ' [^\n]* ' '
+ date '\n' @start_headers;
+
+ headerChar = print - [ :];
+ headersToPrint = 'From' |
+ 'To' | 'Subject';
+ headersToConsume =
+ headerChar+ - headersToPrint;
+
+ action init_hlen {hlen = 0;}
+ action hlen {hlen++ < 50}
+
+ consumeHeaderBody =
+ ':' @init_hlen
+ (
+ [^\n] |
+ ( '\n' [ \t] )
+ )* when hlen
+ '\n';
+
+ consumeHeader =
+ headersToConsume consumeHeaderBody;
+
+ addrWS = ( [ \t]+ | '\n' [ \t]+ );
+ addrComment = '(' [^)]* ')';
+ addrWord = [^"'@,<>() \t\n]+;
+ addrAddr1 = '<' [^>]* '>';
+ addrAddr2 = addrWord '@' addrWord;
+ addrString =
+ '"' [^"]* '"' |
+ "'" [^']* "'";
+
+ addrItem = (
+ addrAddr1 %prn_addr1 |
+ addrAddr2 %prn_addr2 |
+ addrWord %prn_word |
+ addrString %prn_word
+ ) >start_word;
+
+ address = (
+ addrWS |
+ addrComment |
+ addrItem
+ )** >prn_tab;
+
+ addrHeader = (
+ 'From' %prn_from |
+ 'To' %prn_to
+ ) ':' @init_hlen
+ ( address ( ',' @prn_nl address )* ) when hlen
+ '\n' %prn_nl;
+
+ subjectHeader =
+ 'Subject:' @prn_subj @prn_tab @init_hlen
+ (
+ ' '* <:
+ (
+ [^\n] @prn_char |
+ ( '\n' [ \t]+ ) %prn_space
+ )**
+ ) when hlen
+ '\n' %prn_nl;
+
+ header = consumeHeader |
+ addrHeader | subjectHeader;
+
+ messageLine =
+ ( [^\n]* when hlen '\n' @init_hlen ) - fromLine;
+
+ main := (
+ fromLine %prn_separator
+ header*
+ '\n' @end_headers @init_hlen
+ messageLine*
+ )*;
+ }%%
+
+%% write data;
+
+#define BUFSIZE 8192
+
+void test( char *buf )
+{
+ int cs, len = strlen( buf );
+ char *preserve = 0, *ws = 0;
+ int hlen = 0;
+
+ %% write init;
+ char *p = buf;
+ char *pe = p + len;
+ %% write exec;
+
+ if ( cs < mailbox_first_final ) {
+ cout << endl << endl;
+ cout << "DID NOT FINISH IN A FINAL STATE" << endl;
+ }
+}
+
+int main()
+{
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "From: \"Adrian D. Thurston\" <thurston@cs.queensu.ca>\n"
+ "Subject: the squirrel has landed\n"
+ "\n"
+ "Message goes here. \n"
+ "From (trick from line).\n"
+ "From: not really a header\n"
+ "\n"
+ "From user2@host2.com Wed Nov 28 13:30:05 2001\n"
+ "To: \"(kill 1)\" Edgar Allen Poe <ep@net.com> (da man)\n"
+ "Subject: (no subject) this is a really long subject which should fail the length constraint \n"
+ "Other: 0123456789\n"
+ "\n"
+ "Message goes here. \n"
+ "\n"
+ );
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "To: \"(kill 2)\" some guy <sg@net.com>\n"
+ "From: \"Adrian D. Thurston this name is far too long\" <thurston@cs.queensu.ca>\n"
+ "Subject: the squirrel has landed\n"
+ "\n"
+ "From user2@host2.com Wed Nov 28 13:30:05 2001\n"
+ "To: Edgar Allen Poe <ep@net.com> (da man)\n"
+ "Subject: (no subject) \n"
+ "\n"
+ );
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "To: \"(kill 3)\" some guy <sg@net.com>\n"
+ "From: \"Adrian D. Thurston This name is fore sure absolutely too long\" <t@cs.ca>\n"
+ "Subject: the squirrel has landed\n"
+ "\n"
+ );
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "From: \"Adrian D. Thurston \" <t@cs.ca>\n"
+ "Subject: (kill 4) the squirrel has landed\n"
+ "Other: This is another header field, not interpreted, that is too long\n"
+ "\n"
+ );
+ test(
+ "From user@host.com Wed Nov 28 13:30:05 2001\n"
+ "From: \"Adrian D. Thurston \" <t@cs.ca>\n"
+ "Subject: (kill 5)the squirrel has landed\n"
+ "\n"
+ "This message line is okay.\n"
+ "But this message line is far too long and will cause an error.\n"
+ );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+------
+FROM
+ "Adrian D. Thurston" | thurston@cs.queensu.ca
+SUBJECT
+ the squirrel has landed
+------
+TO
+ "(kill 1)" Edgar Allen Poe | ep@net.com
+SUBJECT
+ (no subject) this is a really long subject whic
+
+DID NOT FINISH IN A FINAL STATE
+------
+TO
+ "(kill 2)" some guy | sg@net.com
+FROM
+ "Adrian D. Thurston this name is far too long"
+
+DID NOT FINISH IN A FINAL STATE
+------
+TO
+ "(kill 3)" some guy | sg@net.com
+FROM
+
+
+DID NOT FINISH IN A FINAL STATE
+------
+FROM
+ "Adrian D. Thurston " | t@cs.ca
+SUBJECT
+ (kill 4) the squirrel has landed
+
+
+DID NOT FINISH IN A FINAL STATE
+------
+FROM
+ "Adrian D. Thurston " | t@cs.ca
+SUBJECT
+ (kill 5)the squirrel has landed
+
+
+DID NOT FINISH IN A FINAL STATE
+#endif
diff --git a/test/minimize1.rl b/test/minimize1.rl
new file mode 100644
index 0000000..d7c6ef4
--- /dev/null
+++ b/test/minimize1.rl
@@ -0,0 +1,83 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct min
+{
+ int cs;
+};
+
+%%{
+ machine min;
+ variable curstate fsm->cs;
+
+ action a_or_b { printf("a or b\n"); }
+
+ main := (
+ ( 'a' . [ab]* @a_or_b ) |
+ ( 'b' . [ab]* @a_or_b )
+ ) . '\n';
+}%%
+
+%% write data;
+
+void min_init( struct min *fsm )
+{
+ %% write init;
+}
+
+void min_execute( struct min *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int min_finish( struct min *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == min_error )
+ return -1;
+ if ( fsm->cs >= min_first_final )
+ return 1;
+ return 0;
+}
+
+struct min fsm;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ min_init( &fsm );
+ min_execute( &fsm, buf, len );
+ if ( min_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test( "aaaaaa\n" );
+ test( "a\n" );
+ test( "abc\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+a or b
+a or b
+a or b
+a or b
+a or b
+ACCEPT
+ACCEPT
+a or b
+FAIL
+#endif
diff --git a/test/patact.rl b/test/patact.rl
new file mode 100644
index 0000000..c15d93d
--- /dev/null
+++ b/test/patact.rl
@@ -0,0 +1,91 @@
+/*
+ * @LANG: indep
+ */
+
+char comm;
+int top;
+int stack[32];
+ptr tokstart;
+ptr tokend;
+int act;
+int val;
+%%
+%%{
+ machine patact;
+
+ other := |*
+ [a-z]+ => { prints "word\n"; };
+ [0-9]+ => { prints "num\n"; };
+ [\n ] => { prints "space\n"; };
+ *|;
+
+ exec_test := |*
+ [a-z]+ => { prints "word (w/lbh)\n"; fexec tokend-1; fgoto other; };
+ [a-z]+ ' foil' => { prints "word (c/lbh)\n"; };
+ [\n ] => { prints "space\n"; };
+ '22' => { prints "num (w/switch)\n"; };
+ [0-9]+ => { prints "num (w/switch)\n"; fexec tokend-1; fgoto other;};
+ [0-9]+ ' foil' => {prints "num (c/switch)\n"; };
+ '!';# => { prints "immdiate\n"; fgoto exec_test; };
+ *|;
+
+ main := |*
+ [a-z]+ => { prints "word (w/lbh)\n"; fhold; fgoto other; };
+ [a-z]+ ' foil' => { prints "word (c/lbh)\n"; };
+ [\n ] => { prints "space\n"; };
+ '22' => { prints "num (w/switch)\n"; };
+ [0-9]+ => { prints "num (w/switch)\n"; fhold; fgoto other;};
+ [0-9]+ ' foil' => {prints "num (c/switch)\n"; };
+ '!' => { prints "immdiate\n"; fgoto exec_test; };
+ *|;
+}%%
+/* _____INPUT_____
+"abcd foix\n"
+"abcd\nanother\n"
+"123 foix\n"
+"!abcd foix\n"
+"!abcd\nanother\n"
+"!123 foix\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+word (w/lbh)
+word
+space
+word
+space
+ACCEPT
+word (w/lbh)
+word
+space
+word
+space
+ACCEPT
+num (w/switch)
+num
+space
+word
+space
+ACCEPT
+immdiate
+word (w/lbh)
+word
+space
+word
+space
+ACCEPT
+immdiate
+word (w/lbh)
+word
+space
+word
+space
+ACCEPT
+immdiate
+num (w/switch)
+num
+space
+word
+space
+ACCEPT
+_____OUTPUT_____ */
+
diff --git a/test/range.rl b/test/range.rl
new file mode 100644
index 0000000..34bc430
--- /dev/null
+++ b/test/range.rl
@@ -0,0 +1,76 @@
+/*
+ * @LANG: c
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct range
+{
+ int cs;
+};
+
+%%{
+ machine range_fsm;
+ variable curstate fsm->cs;
+
+ main := ( 'a' .. 'c' | 'c' .. 'e' | 'm' .. 'n' | 'a' .. 'z' ) '\n';
+}%%
+
+%% write data;
+
+void range_init( struct range *fsm )
+{
+ %% write init;
+}
+
+void range_execute( struct range *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int range_finish( struct range *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == range_fsm_error )
+ return -1;
+ if ( fsm->cs >= range_fsm_first_final )
+ return 1;
+ return 0;
+}
+
+struct range fsm;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ range_init( &fsm );
+ range_execute( &fsm, buf, len );
+ if ( range_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test( "a\n" );
+ test( "z\n" );
+ test( "g\n" );
+ test( "no\n" );
+ test( "1\n" );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+#endif
diff --git a/test/repetition.rl b/test/repetition.rl
new file mode 100644
index 0000000..23638b3
--- /dev/null
+++ b/test/repetition.rl
@@ -0,0 +1,293 @@
+/*
+ * @LANG: c++
+ */
+
+/* Test repeptition operators. */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+struct Rep
+{
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine Rep;
+
+ action begin { cout << "begin" << endl; }
+ action in { cout << "in" << endl; }
+ action end { cout << "end" << endl; }
+
+ a = 'a' >begin @in %end;
+ b = 'b' >begin @in %end;
+ c = 'c' >begin @in %end;
+ d = 'd' >begin @in %end;
+
+ main :=
+ ( a {5} '\n' )* '-\n'
+ ( b {,5} '\n' )* '-\n'
+ ( c {5,} '\n' )* '-\n'
+ ( d {2,5} '\n' )*;
+}%%
+
+%% write data;
+
+int Rep::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int Rep::execute( const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+
+ if ( cs == Rep_error )
+ return -1;
+ if ( cs >= Rep_first_final )
+ return 1;
+ return 0;
+}
+
+int Rep::finish( )
+{
+ %% write eof;
+ if ( cs == Rep_error )
+ return -1;
+ if ( cs >= Rep_first_final )
+ return 1;
+ return 0;
+}
+
+void test( char *buf )
+{
+ Rep rep;
+ int len = strlen( buf );
+ rep.init();
+ rep.execute( buf, len );
+ if ( rep.finish() > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+int main()
+{
+ test(
+ "aaaaa\n"
+ "-\n"
+ "\n"
+ "b\n"
+ "bb\n"
+ "bbb\n"
+ "bbbb\n"
+ "bbbbb\n"
+ "-\n"
+ "ccccc\n"
+ "ccccccc\n"
+ "cccccccccc\n"
+ "-\n"
+ "dd\n"
+ "ddd\n"
+ "dddd\n"
+ "ddddd\n"
+ );
+
+ test(
+ "a\n"
+ "-\n"
+ "b\n"
+ "-\n"
+ "c\n"
+ "-\n"
+ "d\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+begin
+in
+end
+ACCEPT
+begin
+in
+FAIL
+#endif
diff --git a/test/rlscan.rl b/test/rlscan.rl
new file mode 100644
index 0000000..943c4f5
--- /dev/null
+++ b/test/rlscan.rl
@@ -0,0 +1,287 @@
+/*
+ * Lexes Ragel input files.
+ *
+ * @LANG: c++
+ * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -G0 -G1 -G2 -P
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+void escapeXML( char *data )
+{
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void escapeXML( char c )
+{
+ switch ( c ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << c; break;
+ }
+}
+
+void escapeXML( char *data, int len )
+{
+ for ( char *end = data + len; data != end; data++ ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ }
+}
+
+inline void write( char *data )
+{
+ cout << data;
+}
+
+inline void write( char c )
+{
+ cout << c;
+}
+
+inline void write( char *data, int len )
+{
+ cout.write( data, len );
+}
+
+
+%%{
+ machine RagelScan;
+
+ word = [a-zA-Z_][a-zA-Z_0-9]*;
+ integer = [0-9]+;
+ hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+ default = ^0;
+ EOF = 0;
+
+ # Handles comments in outside code and inline blocks.
+ c_comment :=
+ ( default* :>> '*/' )
+ ${ escapeXML( fc ); }
+ @{ fret; };
+
+ action emit {
+ escapeXML( tokstart, tokend-tokstart );
+ }
+
+ #
+ # Inline action code
+ #
+
+ ilscan := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+ '/*' {
+ write( "/*" );
+ fcall c_comment;
+ };
+ '//' [^\n]* '\n' => emit;
+
+ '{' {
+ write( '{' );
+ inline_depth += 1;
+ };
+
+ '}' {
+ write( '}' );
+ /* If dropping down to the last } then return
+ * to ragel code. */
+ if ( --inline_depth == 0 ) {
+ write( "</inline>\n" );
+ fgoto rlscan;
+ }
+ };
+
+ default => { escapeXML( *tokstart ); };
+ *|;
+
+ #
+ # Ragel Tokens
+ #
+
+ rlscan := |*
+ '}%%' {
+ if ( !single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ '\n' {
+ if ( single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ # Word
+ word {
+ write( "<word>" );
+ write( tokstart, tokend-tokstart );
+ write( "</word>\n" );
+ };
+
+ # Decimal integer.
+ integer {
+ write( "<int>" );
+ write( tokstart, tokend-tokstart );
+ write( "</int>\n" );
+ };
+
+ # Hexidecimal integer.
+ hex {
+ write( "<hex>" );
+ write( tokstart, tokend-tokstart );
+ write( "</hex>\n" );
+ };
+
+ # Consume comments.
+ '#' [^\n]* '\n';
+
+ # Single literal string.
+ "'" ( [^'\\] | /\\./ )* "'" {
+ write( "<single_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</single_lit>\n" );
+ };
+
+ # Double literal string.
+ '"' ( [^"\\] | /\\./ )* '"' {
+ write( "<double_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</double_lit>\n" );
+ };
+
+ # Or literal.
+ '[' ( [^\]\\] | /\\./ )* ']' {
+ write( "<or_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</or_lit>\n" );
+ };
+
+ # Regex Literal.
+ '/' ( [^/\\] | /\\./ ) * '/' {
+ write( "<re_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</re_lit>\n" );
+ };
+
+ # Open an inline block
+ '{' {
+ inline_depth = 1;
+ write( "<inline>{" );
+ fgoto ilscan;
+ };
+
+ punct {
+ write( "<symbol>" );
+ escapeXML( fc );
+ write( "</symbol>\n" );
+ };
+
+ default;
+ *|;
+
+ #
+ # Outside code.
+ #
+
+ main := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+
+ '/*' {
+ escapeXML( tokstart, tokend-tokstart );
+ fcall c_comment;
+ };
+
+ '//' [^\n]* '\n' => emit;
+
+ '%%{' {
+ write( "<section>\n" );
+ single_line = false;
+ fgoto rlscan;
+ };
+
+ '%%' {
+ write( "<section>\n" );
+ single_line = true;
+ fgoto rlscan;
+ };
+
+ default {
+ escapeXML( *tokstart );
+ };
+
+ # EOF.
+ EOF;
+ *|;
+}%%
+
+%% write data nofinal;
+
+void test( char *data )
+{
+ std::ios::sync_with_stdio(false);
+
+ int cs, act;
+ char *tokstart, *tokend;
+ int stack[1], top;
+
+ bool single_line = false;
+ int inline_depth = 0;
+
+ %% write init;
+
+ /* Read in a block. */
+ char *p = data;
+ char *pe = data + strlen( data );
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+}
+
+#define BUFSIZE 2048
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n");
+
+ return 0;
+}
+#ifdef _____OUTPUT_____
+hi <section>
+<re_lit>/'}%%'/</re_lit>
+<inline>{ /*{*/ {} }</inline>
+<symbol>+</symbol>
+<single_lit>'\''</single_lit>
+</section>
+there
+#endif
diff --git a/test/runtests b/test/runtests
new file mode 100755
index 0000000..32b5fb0
--- /dev/null
+++ b/test/runtests
@@ -0,0 +1,251 @@
+#!/bin/bash
+
+#
+# Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+while getopts "gcnmleT:F:G:P:CDJ" opt; do
+ case $opt in
+ T|F|G|P)
+ genflags="$genflags -$opt$OPTARG"
+ options="$options -$opt$OPTARG"
+ ;;
+ n|m|l|e)
+ minflags="$minflags -$opt"
+ options="$options -$opt"
+ ;;
+ c)
+ compile_only="true"
+ options="$options -$opt"
+ ;;
+ g)
+ allow_generated="true"
+ ;;
+ C|D|J)
+ langflags="$langflags -$opt"
+ ;;
+ esac
+done
+
+[ -z "$minflags" ] && minflags="-n -m -l -e"
+[ -z "$genflags" ] && genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2"
+[ -z "$langflags" ] && langflags="-C -D -J"
+
+shift $((OPTIND - 1));
+
+[ -z "$*" ] && set -- *.rl
+
+# find the config file
+config=../common/config.h
+ragel=../ragel/ragel
+rlcodegen=../rlcodegen/rlcodegen
+if ! [ -d ../common ]; then
+ config=../$config
+ ragel=../$ragel
+ rlcodegen=../$rlcodegen
+fi
+
+cxx_compiler=`sed '/^#define CXX/s/#define CXX *//p;d' $config`
+c_compiler=`sed '/^#define CC/s/#define CC *//p;d' $config`
+objc_compiler=`sed '/^#define GOBJC/s/#define GOBJC *//p;d' $config`
+d_compiler=`sed '/^#define GDC/s/#define GDC *//p;d' $config`
+java_compiler=`sed '/#define JAVAC/s/#define JAVAC *//p;d' $config`
+txl_engine=`sed '/^#define TXL/s/#define TXL *//p;d' $config`
+
+function test_error
+{
+ exit 1;
+}
+
+for test_case; do
+ root=${test_case%.rl};
+
+ if ! [ -f "$test_case" ]; then
+ echo "runtests: not a file: $test_case"; >&2
+ exit 1;
+ fi
+
+ # Check if we should ignore the test case
+ ignore=`sed '/@IGNORE:/s/^.*: *//p;d' $test_case`
+ if [ "$ignore" = yes ]; then
+ continue;
+ fi
+
+ # If the generated flag is given make sure that the test case is generated.
+ is_generated=`sed '/@GENERATED:/s/^.*: *//p;d' $test_case`
+ if [ "$is_generated" = yes ] && [ "$allow_generated" != true ]; then
+ continue;
+ fi
+
+ expected_out=$root.exp;
+ sed '1,/_____OUTPUT_____/d;$d' $test_case > $expected_out
+
+ lang=`sed '/@LANG:/s/^.*: *//p;d' $test_case`
+ if [ -z "$lang" ]; then
+ echo "$test_case: language unset"; >&2
+ exit 1;
+ fi
+
+ case $lang in
+ c++)
+ code_suffix=cpp;
+ compiler=$cxx_compiler;
+ lang_opt=-C;
+ cflags="-pedantic -ansi -Wall -O3"
+ ;;
+ d)
+ code_suffix=d;
+ compiler=$d_compiler;
+ lang_opt=-D;
+ cflags="-Wall -O3"
+ ;;
+ c)
+ code_suffix=c;
+ compiler=$c_compiler;
+ lang_opt=-C;
+ cflags="-pedantic -ansi -Wall -O3"
+ ;;
+ obj-c)
+ code_suffix=m;
+ compiler=$objc_compiler
+ lang_opt=-C;
+ cflags="-Wall -O3 -fno-strict-aliasing -lobjc"
+ ;;
+ java)
+ code_suffix=java;
+ compiler=$java_compiler
+ lang_opt=-J;
+ cflags=""
+ ;;
+ indep)
+ # If we have no compiler for the source program then skip it.
+ [ -z "$txl_engine" ] && continue
+ for lang in c d java; do
+ case $lang in
+ c) lf="-C";;
+ d) lf="-D";;
+ java) lf="-J";;
+ esac
+
+ echo "$langflags" | grep -e $lf >/dev/null || continue
+
+ targ=${root}_$lang.rl
+ echo "./langtrans_$lang.sh $test_case > $targ"
+ if ! ./langtrans_$lang.sh $test_case > $targ; then
+ test_error
+ fi
+ echo "./runtests -g $options $targ"
+ if ! ./runtests -g $options $targ; then
+ test_error
+ fi
+ done
+ continue;
+ ;;
+ *)
+ echo "$test_case: unknown language type $lang" >&2
+ exit 1;
+ ;;
+ esac
+
+ # Make sure that we are interested in the host language.
+ echo "$langflags" | grep -e $lang_opt >/dev/null || continue
+
+ code_src=$root.$code_suffix;
+ binary=$root.bin;
+ output=$root.out;
+
+ # If we have no compiler for the source program then skip it.
+ [ -z "$compiler" ] && continue
+
+ additional_cflags=`sed '/@CFLAGS:/s/^.*: *//p;d' $test_case`
+ [ -n "$additional_cflags" ] && cflags="$cflags $additional_cflags"
+
+ allow_minflags=`sed '/@ALLOW_MINFLAGS:/s/^.*: *//p;d' $test_case`
+ [ -z "$allow_minflags" ] && allow_minflags="-n -m -l -e"
+
+ allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case`
+ [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2"
+
+ for min_opt in $minflags; do
+ for gen_opt in $genflags; do
+ echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue
+
+ grep_gen_opt=${gen_opt}
+ split_iters=${gen_opt#-P}
+ if test $split_iters != $gen_opt; then
+ grep_gen_opt="-P";
+ fi
+ echo "$allow_genflags" | grep -e $grep_gen_opt >/dev/null || continue
+
+ echo "$ragel $min_opt $lang_opt $test_case | $rlcodegen $gen_opt -o $code_src"
+ if ! $ragel $min_opt $lang_opt $test_case | $rlcodegen $gen_opt -o $code_src; then
+ test_error;
+ fi
+
+ split_objs=""
+ if test $split_iters != $gen_opt; then
+ n=0;
+ while test $n -lt $split_iters; do
+ part_root=${root}_`awk 'BEGIN {
+ width = 0;
+ high = '$split_iters' - 1;
+ while ( high > 0 ) {
+ width = width + 1;
+ high = int(high / 10);
+ }
+ suffFormat = "%" width "." width "d\n";
+ printf( suffFormat, '$n' );
+ exit 0;
+ }'`
+ part_src=${part_root}.c
+ part_bin=${part_root}.o
+ echo "$compiler -c $cflags -o $part_bin $part_src"
+ if ! $compiler -c $cflags -o $part_bin $part_src; then
+ test_error;
+ fi
+ split_objs="$split_objs $part_bin"
+ n=$((n+1))
+ done
+ fi
+
+ out_args=""
+ [ $lang != java ] && out_args="-o ${binary}";
+
+ echo "$compiler ${cflags} ${out_args} ${code_src}"
+ if ! $compiler ${cflags} ${out_args} ${code_src}; then
+ test_error;
+ fi
+
+ if [ "$compile_only" != "true" ]; then
+ echo -n "running $root ... ";
+
+ exec_cmd=./$binary
+ [ $lang = java ] && exec_cmd="java $root"
+
+ $exec_cmd 2>&1 > $output;
+ if diff $expected_out $output > /dev/null; then
+ echo "passed";
+ else
+ echo "FAILED";
+ test_error;
+ fi;
+ fi
+ done
+ done
+done
diff --git a/test/stateact1.rl b/test/stateact1.rl
new file mode 100644
index 0000000..ef50c75
--- /dev/null
+++ b/test/stateact1.rl
@@ -0,0 +1,48 @@
+/*
+ * @LANG: indep
+ *
+ * Test in and out state actions.
+ */
+%%
+%%{
+ machine state_act;
+
+ action a1 { prints "a1\n"; }
+ action a2 { prints "a2\n"; }
+ action b1 { prints "b1\n"; }
+ action b2 { prints "b2\n"; }
+ action c1 { prints "c1\n"; }
+ action c2 { prints "c2\n"; }
+ action next_again {fnext again;}
+
+ hi = 'hi';
+ line = again:
+ hi
+ >to b1
+ >from b2
+ '\n'
+ >to c1
+ >from c2
+ @next_again;
+
+ main := line*
+ >to a1
+ >from a2;
+}%%
+
+/* _____INPUT_____
+"hi\nhi\n"
+_____INPUT_____ */
+
+/* _____OUTPUT_____
+a2
+b2
+c1
+c2
+b1
+b2
+c1
+c2
+b1
+FAIL
+_____OUTPUT_____ */
diff --git a/test/statechart1.rl b/test/statechart1.rl
new file mode 100644
index 0000000..9f1ce49
--- /dev/null
+++ b/test/statechart1.rl
@@ -0,0 +1,102 @@
+/*
+ * @LANG: c
+ */
+
+/*
+ * Test in and out state actions.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+struct state_chart
+{
+ int cs;
+};
+
+%%{
+ machine state_chart;
+ variable curstate fsm->cs;
+
+ action a { printf("a"); }
+ action b { printf("b"); }
+ action hexa { printf("a"); }
+ action hexb { printf("b"); }
+
+ hex_a = '0x' '0'* '61' @hexa;
+ hex_b = '0x' '0'* '62' @hexb;
+
+ a = 'a' @a | hex_a;
+ b = 'b' @b | hex_b;
+ ws = ' '+;
+
+ mach =
+ start: (
+ a -> st1 |
+ b -> st2 |
+ zlen -> final
+ ),
+ st1: (
+ a -> st1 |
+ ws -> start |
+ zlen -> final
+ ),
+ st2: (
+ b -> st2 |
+ ws -> start |
+ zlen -> final
+ );
+
+ main := ( mach '\n' )*;
+}%%
+
+%% write data;
+
+void state_chart_init( struct state_chart *fsm )
+{
+ %% write init;
+}
+
+void state_chart_execute( struct state_chart *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int state_chart_finish( struct state_chart *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == state_chart_error )
+ return -1;
+ if ( fsm->cs >= state_chart_first_final )
+ return 1;
+ return 0;
+}
+
+struct state_chart sc;
+
+void test( char *buf )
+{
+ int len = strlen( buf );
+ state_chart_init( &sc );
+ state_chart_execute( &sc, buf, len );
+ state_chart_finish( &sc );
+ printf("\n");
+}
+
+int main()
+{
+ test(
+ "aa0x0061aa b\n"
+ "bbb0x62b 0x61 0x000062\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+aaaaabbbbbbab
+#endif
diff --git a/test/strings1.rl b/test/strings1.rl
new file mode 100644
index 0000000..d156da1
--- /dev/null
+++ b/test/strings1.rl
@@ -0,0 +1,195 @@
+/*
+ * @LANG: c
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+struct strs
+{
+ int cs;
+};
+
+%%{
+ machine strs;
+ variable curstate fsm->cs;
+
+ main :=
+ "__gmon_start__\n" |
+ "cerr\n" |
+ "__cp_push_exception\n" |
+ "_DYNAMIC\n" |
+ "__rtti_user\n" |
+ "__rtti_si\n" |
+ "_init\n" |
+ "__throw\n" |
+ "__deregister_frame_info\n" |
+ "terminate__Fv\n" |
+ "__builtin_vec_new\n" |
+ "_fini\n" |
+ "__builtin_vec_delete\n" |
+ "_GLOBAL_OFFSET_TABLE_\n" |
+ "__nw__FUiPv\n" |
+ "__builtin_delete\n" |
+ "__builtin_new\n" |
+ "cout\n" |
+ "__register_frame_info\n" |
+ "__eh_alloc\n" |
+ "strcpy\n" |
+ "stdout\n" |
+ "memmove\n" |
+ "memcpy\n" |
+ "malloc\n" |
+ "isatty\n" |
+ "strtoul\n" |
+ "fprintf\n" |
+ "stdin\n" |
+ "ferror\n" |
+ "strncpy\n" |
+ "unlink\n" |
+ "strcasecmp\n" |
+ "realloc\n" |
+ "_IO_getc\n" |
+ "fread\n" |
+ "memset\n" |
+ "__assert_fail\n" |
+ "strcmp\n" |
+ "stderr\n" |
+ "fwrite\n" |
+ "exit\n" |
+ "fopen\n" |
+ "atoi\n" |
+ "fileno\n" |
+ "_IO_stdin_used\n" |
+ "__libc_start_main\n" |
+ "strlen\n" |
+ "free\n" |
+ "_edata\n" |
+ "__bss_start\n" |
+ "_end\n" |
+ "QVhl\n" |
+ "BPPh\n" |
+ "PHRV\n" |
+ "PHRj\n" |
+ "PHRj\n" |
+ "jphy\n" |
+ "jqhy\n" |
+ "PHRj\n" |
+ "PHRj\n" |
+ "LWVS\n" |
+ "LWVS\n" |
+ "bad_alloc\n" |
+ "main\n" |
+ "false\n" |
+ "help\n" |
+ "bad_alloc\n" |
+ "bad_alloc\n" |
+ "bad_alloc\n" |
+ "ascii\n" |
+ "extend\n" |
+ "alnum\n" |
+ "alpha\n" |
+ "cntrl\n" |
+ "digit\n" |
+ "graph\n" |
+ "lower\n" |
+ "print\n" |
+ "punct\n" |
+ "space\n" |
+ "upper\n" |
+ "xdigit\n" |
+ "false\n" |
+ "bad_alloc\n" |
+ "bad_alloc\n" |
+ "bad_alloc\n" |
+ "TransStruct\n" |
+ "StateStruct\n" |
+ "Struct\n" |
+ "Init\n" |
+ "bad_alloc\n" |
+ "TransStruct\n" |
+ "StateStruct\n" |
+ "Struct\n" |
+ "Init\n" |
+ "Accept\n" |
+ "Finish\n" |
+ "bad_alloc\n" |
+ "Struct\n" |
+ "Init\n" |
+ "Finish\n" |
+ "Accept\n" |
+ "bad_alloc\n" |
+ "Struct\n" |
+ "Init\n" |
+ "bad_alloc\n" |
+ "Struct\n" |
+ "Init\n" |
+ "Finish\n" |
+ "Accept\n" |
+ "bad_alloc\n" |
+ "Struct\n" |
+ "Init\n" |
+ "Finish\n" |
+ "Accept";
+}%%
+
+%% write data;
+
+void strs_init( struct strs *fsm )
+{
+ %% write init;
+}
+
+void strs_execute( struct strs *fsm, const char *_data, int _len )
+{
+ const char *p = _data;
+ const char *pe = _data+_len;
+
+ %% write exec;
+}
+
+int strs_finish( struct strs *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == strs_error )
+ return -1;
+ if ( fsm->cs >= strs_first_final )
+ return 1;
+ return 0;
+}
+
+struct strs fsm;
+void test( char *buf )
+{
+ int len = strlen( buf );
+ strs_init( &fsm );
+ strs_execute( &fsm, buf, len );
+ if ( strs_finish( &fsm ) > 0 )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test( "stdin\n" );
+ test( "bad_alloc\n" );
+ test( "_GLOBAL_OFFSET_TABLE_\n" );
+ test( "not in\n" );
+ test(
+ "isatty\n"
+ "junk on end.\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+#endif
diff --git a/test/strings2.h b/test/strings2.h
new file mode 100644
index 0000000..1cf0ce9
--- /dev/null
+++ b/test/strings2.h
@@ -0,0 +1,9 @@
+#ifndef _STRINGS1_H
+#define _STRINGS1_H
+
+struct strs
+{
+ int cs;
+};
+
+#endif
diff --git a/test/strings2.rl b/test/strings2.rl
new file mode 100644
index 0000000..ad4dd40
--- /dev/null
+++ b/test/strings2.rl
@@ -0,0 +1,1349 @@
+/*
+ * @LANG: c
+ * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 -P
+ * @ALLOW_MINFLAGS: -n -m -l
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "strings2.h"
+
+%%{
+ machine strs;
+ variable curstate fsm->cs;
+
+ main :=
+ "/lib/ld-linux.so.2\n" |
+ "libstdc++-libc6.2-2.so.3\n" |
+ "cerr\n" |
+ "__cp_push_exception\n" |
+ "_DYNAMIC\n" |
+ "endl__FR7ostream\n" |
+ "__ls__7ostreamc\n" |
+ "_._9exception\n" |
+ "__vt_9bad_alloc\n" |
+ "__rtti_user\n" |
+ "__ls__7ostreamPFR7ostream_R7ostream\n" |
+ "__rtti_si\n" |
+ "_init\n" |
+ "bad__C3ios\n" |
+ "__throw\n" |
+ "__ls__7ostreamPCc\n" |
+ "__deregister_frame_info\n" |
+ "terminate__Fv\n" |
+ "__ls__7ostreamb\n" |
+ "__ls__7ostreami\n" |
+ "__8ofstreamiPCcii\n" |
+ "__builtin_vec_new\n" |
+ "_fini\n" |
+ "__9exception\n" |
+ "__builtin_vec_delete\n" |
+ "_GLOBAL_OFFSET_TABLE_\n" |
+ "__vt_9exception\n" |
+ "__nw__FUiPv\n" |
+ "_._9bad_alloc\n" |
+ "__builtin_delete\n" |
+ "__builtin_new\n" |
+ "cout\n" |
+ "__register_frame_info\n" |
+ "__eh_alloc\n" |
+ "__gmon_start__\n" |
+ "libm.so.6\n" |
+ "libc.so.6\n" |
+ "strcpy\n" |
+ "stdout\n" |
+ "memmove\n" |
+ "memcpy\n" |
+ "malloc\n" |
+ "strtoul\n" |
+ "fprintf\n" |
+ "stdin\n" |
+ "ferror\n" |
+ "strncpy\n" |
+ "strcasecmp\n" |
+ "realloc\n" |
+ "_IO_getc\n" |
+ "fread\n" |
+ "memset\n" |
+ "clearerr\n" |
+ "__assert_fail\n" |
+ "strcmp\n" |
+ "stderr\n" |
+ "fwrite\n" |
+ "__errno_location\n" |
+ "exit\n" |
+ "fopen\n" |
+ "atoi\n" |
+ "_IO_stdin_used\n" |
+ "__libc_start_main\n" |
+ "strlen\n" |
+ "free\n" |
+ "_edata\n" |
+ "__bss_start\n" |
+ "_end\n" |
+ "GLIBC_2.1\n" |
+ "GLIBC_2.0\n" |
+ "PTRh\n" |
+ "QVhL\n" |
+ "<WVS\n" |
+ "LWVS\n" |
+ "PHRW\n" |
+ "<WVS\n" |
+ "\WVS\n" |
+ ",WVS\n" |
+ "@Phl\n" |
+ "<WVS\n" |
+ "jZjA\n" |
+ "jzja\n" |
+ "j9j0\n" |
+ "j9j0\n" |
+ "jZjA\n" |
+ "jzja\n" |
+ "jzja\n" |
+ "jZjA\n" |
+ "j~j!\n" |
+ "j~j \n" |
+ "j/j!\n" |
+ "j@j:\n" |
+ "j`j[\n" |
+ "j~j{\n" |
+ "j9j0\n" |
+ "jFjA\n" |
+ "jfja\n" |
+ ",WVS\n" |
+ ",WVS\n" |
+ ";C<|\n" |
+ "<WVS\n" |
+ "C ;C\n" |
+ "C$;C\n" |
+ "C$;C\n" |
+ "C ;C\n" |
+ ",WVS\n" |
+ ";E uF\n" |
+ "P ;U\n" |
+ "P ;U\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "E$fP\n" |
+ "u!h@\n" |
+ "PHRj\n" |
+ "PHRj\n" |
+ "P\ U\n" |
+ "j]hY\n" |
+ "johY\n" |
+ "PHRj\n" |
+ "PHRj\n" |
+ "E fPj\n" |
+ "E fP\n" |
+ "E fP\n" |
+ "E fP\n" |
+ "E fP\n" |
+ "E fP\n" |
+ "E fPj\n" |
+ "t$h`\n" |
+ "F ;C } \n" |
+ "F ;C ~ \n" |
+ "@X:BXt)\n" |
+ "\WVS\n" |
+ "\WVS\n" |
+ "PPRS\n" |
+ "F ;C } \n" |
+ "F ;C ~ \n" |
+ "@X:BXt)\n" |
+ ";H(}:\n" |
+ "@ fP\n" |
+ ";P |\n" |
+ "<WVS\n" |
+ ";P |\n" |
+ "bad_alloc\n" |
+ "usage: ragel [options] file\n" |
+ "general:\n" |
+ " -h, -H, -? Disply this usage.\n" |
+ " -o <file> Write output to <file>.\n" |
+ " -s Print stats on the compiled fsm.\n" |
+ " -f Dump the final fsm.\n" |
+ "fsm minimization:\n" |
+ " -n No minimization (default).\n" |
+ " -m Find the minimal fsm accepting the language.\n" |
+ "generated code language:\n" |
+ " -c Generate c code (default).\n" |
+ " -C Generate c++ code.\n" |
+ "generated code style:\n" |
+ " -T0 Generate a table driven fsm (default).\n" |
+ " -T1 Generate a faster table driven fsm.\n" |
+ " -S0 Generate a switch driven fsm.\n" |
+ " -G0 Generate a goto driven fsm.\n" |
+ " -G1 Generate a faster goto driven fsm.\n" |
+ " -G2 Generate a really fast goto driven fsm.\n" |
+ "char * FileNameFromStem(char *, char *)\n" |
+ "main.cpp\n" |
+ "len > 0\n" |
+ "main\n" |
+ "ragel: main graph not defined\n" |
+ "graph states: \n" |
+ "graph transitions: \n" |
+ "machine states: \n" |
+ "machine functions: \n" |
+ "function array: \n" |
+ "T:S:G:Cco:senmabjkfhH?-:\n" |
+ "ragel: zero length output file name given\n" |
+ "ragel: output file already given\n" |
+ "ragel: invalid param specified (try -h for a list of options)\n" |
+ "help\n" |
+ "ragel: zero length input file name given\n" |
+ "ragel: input file already given\n" |
+ "ragel: warning: -e given but minimization is not enabled\n" |
+ "ragel: no input file (try -h for a list of options)\n" |
+ " for reading\n" |
+ "ragel: could not open \n" |
+ " for writing\n" |
+ "ragel: error opening \n" |
+ " * Parts of this file are copied from Ragel source covered by the GNU\n" |
+ " * GPL. As a special exception, you may use the parts of this file copied\n" |
+ " * from Ragel source without restriction. The remainder is derived from\n" |
+ "bad_alloc\n" |
+ "%s:%i: unterminated literal\n" |
+ "%s:%i: unterminated comment\n" |
+ "%s:%i: bad character in literal\n" |
+ "fatal flex scanner internal error--no action found\n" |
+ "fatal flex scanner internal error--end of buffer missed\n" |
+ "fatal error - scanner input buffer overflow\n" |
+ "input in flex scanner failed\n" |
+ "out of dynamic memory in yy_create_buffer()\n" |
+ "out of dynamic memory in yy_scan_buffer()\n" |
+ "out of dynamic memory in yy_scan_bytes()\n" |
+ "bad buffer in yy_scan_bytes()\n" |
+ "bad_alloc\n" |
+ "%s:%i: warning: range gives null fsm\n" |
+ "%s:%i: warning: literal used in range is not of length 1, using 0x%x\n" |
+ "%s:%i: warning: overflow in byte constant\n" |
+ "parse error\n" |
+ "parser stack overflow\n" |
+ "%s:%i: %s\n" |
+ "bad_alloc\n" |
+ "extend\n" |
+ "ascii\n" |
+ "alpha\n" |
+ "digit\n" |
+ "alnum\n" |
+ "lower\n" |
+ "upper\n" |
+ "cntrl\n" |
+ "graph\n" |
+ "print\n" |
+ "punct\n" |
+ "space\n" |
+ "xdigit\n" |
+ "struct Fsm * FactorWithAugNode::Walk()\n" |
+ "parsetree.cpp\n" |
+ "false\n" |
+ "bad_alloc\n" |
+ "xx []()\n" |
+ " df \n" |
+ "StartState: \n" |
+ "Final States:\n" |
+ "void FsmGraph<State,int,Trans>::AttachStates(State *, State *, Trans *, FsmKeyType, int)\n" |
+ "rlfsm/fsmattach.cpp\n" |
+ "trans->toState == __null\n" |
+ "trans->fromState == __null\n" |
+ "void FsmGraph<State,int,Trans>::DetachStates(State *, State *, Trans *, FsmKeyType, int)\n" |
+ "trans->toState == to\n" |
+ "trans->fromState == from\n" |
+ "inTel != __null\n" |
+ "void Vector<BstMapEl<int,int>,ResizeExpn>::setAs(const Vector<BstMapEl<int,int>,ResizeExpn> &)\n" |
+ "aapl/vectcommon.h\n" |
+ "&v != this\n" |
+ "void FsmGraph<State,int,Trans>::ChangeRangeLowerKey(Trans *, int, int)\n" |
+ "inRangeEl != __null\n" |
+ "void FsmGraph<State,int,Trans>::IsolateStartState()\n" |
+ "rlfsm/fsmgraph.cpp\n" |
+ "md.stateDict.nodeCount == 0\n" |
+ "md.stfil.listLength == 0\n" |
+ "struct State * FsmGraph<State,int,Trans>::DetachState(State *)\n" |
+ "fromTel != __null\n" |
+ "struct Trans * FsmGraph<State,int,Trans>::AttachStates(State *, State *, FsmKeyType, int, int)\n" |
+ "outTel != __null\n" |
+ "outTel1 != __null\n" |
+ "from->defOutTrans == __null\n" |
+ "void FsmGraph<State,int,Trans>::VerifyOutFuncs()\n" |
+ "state->outTransFuncTable.tableLength == 0\n" |
+ "!state->isOutPriorSet\n" |
+ "state->outPriority == 0\n" |
+ "void FsmGraph<State,int,Trans>::VerifyIntegrity()\n" |
+ "rlfsm/fsmbase.cpp\n" |
+ "outIt.trans->fromState == state\n" |
+ "inIt.trans->toState == state\n" |
+ "static int FsmTrans<State,Trans,int,CmpOrd<int> >::ComparePartPtr(FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" |
+ "rlfsm/fsmstate.cpp\n" |
+ "false\n" |
+ "void FsmGraph<State,int,Trans>::InTransMove(State *, State *)\n" |
+ "dest != src\n" |
+ "static bool FsmTrans<State,Trans,int,CmpOrd<int> >::ShouldMarkPtr(MarkIndex<State> &, FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" |
+ "bad_alloc\n" |
+ "10FsmCodeGen\n" |
+ "bad_alloc\n" |
+ " case \n" |
+ "break;}\n" |
+ "unsigned char\n" |
+ "unsigned short\n" |
+ "unsigned int\n" |
+ "{0, \n" |
+ "/* Forward dec state for the transition structure. */\n" |
+ "struct \n" |
+ "StateStruct;\n" |
+ "/* A single transition. */\n" |
+ "struct \n" |
+ "TransStruct\n" |
+ " struct \n" |
+ "StateStruct *toState;\n" |
+ " int *funcs;\n" |
+ "typedef struct \n" |
+ "TransStruct \n" |
+ "Trans;\n" |
+ "/* A single state. */\n" |
+ "struct \n" |
+ "StateStruct\n" |
+ " int lowIndex;\n" |
+ " int highIndex;\n" |
+ " void *transIndex;\n" |
+ " unsigned int dflIndex;\n" |
+ " int *outFuncs;\n" |
+ " int isFinState;\n" |
+ "typedef struct \n" |
+ "StateStruct \n" |
+ "State;\n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ "State *curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "#define f \n" |
+ "#define s \n" |
+ "#define i \n" |
+ "#define t \n" |
+ "/* The array of functions. */\n" |
+ "#if \n" |
+ "static int \n" |
+ "_f[] = {\n" |
+ "#endif\n" |
+ "/* The array of indicies into the transition array. */\n" |
+ "#if \n" |
+ "static \n" |
+ "_i[] = {\n" |
+ "#endif\n" |
+ "/* The aray of states. */\n" |
+ "static \n" |
+ "State \n" |
+ "_s[] = {\n" |
+ "/* The array of transitions. */\n" |
+ "static \n" |
+ "Trans \n" |
+ "_t[] = {\n" |
+ "/* The start state. */\n" |
+ "static \n" |
+ "State *\n" |
+ "_startState = s+\n" |
+ "#undef f\n" |
+ "#undef s\n" |
+ "#undef i\n" |
+ "#undef t\n" |
+ "* Execute functions pointed to by funcs until the null function is found. \n" |
+ "inline static void \n" |
+ "ExecFuncs( \n" |
+ " *fsm, int *funcs, char *p )\n" |
+ " int len = *funcs++;\n" |
+ " while ( len-- > 0 ) {\n" |
+ " switch ( *funcs++ ) {\n" |
+ " * Init the fsm to a runnable state.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ " * Did the fsm accept? \n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ "State *cs = fsm->curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " int c = (unsigned char) *p;\n" |
+ "Trans *trans;\n" |
+ " if ( cs == 0 )\n" |
+ " goto finished;\n" |
+ " /* If the character is within the index bounds then get the\n" |
+ " * transition for it. If it is out of the transition bounds\n" |
+ " * we will use the default transition. */\n" |
+ " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" |
+ " /* Use the index to look into the transition array. */\n" |
+ " trans = \n" |
+ "_t + \n" |
+ " ((\n" |
+ "*)cs->transIndex)[c - cs->lowIndex];\n" |
+ " else {\n" |
+ " /* Use the default index as the char is out of range. */\n" |
+ " trans = \n" |
+ "_t + cs->dflIndex;\n" |
+ " /* If there are functions for this transition then execute them. */\n" |
+ " if ( trans->funcs != 0 )\n" |
+ "ExecFuncs( fsm, trans->funcs, p );\n" |
+ " /* Move to the new state. */\n" |
+ " cs = trans->toState;\n" |
+ "finished:\n" |
+ " fsm->curState = cs;\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ "State *cs = fsm->curState;\n" |
+ " if ( cs != 0 && cs->isFinState ) {\n" |
+ " /* If finishing in a final state then execute the\n" |
+ " * out functions for it. (if any). */\n" |
+ " if ( cs->outFuncs != 0 )\n" |
+ "ExecFuncs( fsm, cs->outFuncs, 0 );\n" |
+ " fsm->accept = 1;\n" |
+ " else {\n" |
+ " /* If we are not in a final state then this\n" |
+ " * is an error. Move to the error state. */\n" |
+ " fsm->curState = 0;\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Forward dec state for the transition structure. */\n" |
+ " struct State;\n" |
+ " /* A single transition. */\n" |
+ " struct Trans\n" |
+ " State *toState;\n" |
+ " int *funcs;\n" |
+ " /* A single state. */\n" |
+ " struct State\n" |
+ " int lowIndex;\n" |
+ " int highIndex;\n" |
+ " void *transIndex;\n" |
+ " unsigned int dflIndex;\n" |
+ " int *outFuncs;\n" |
+ " int isFinState;\n" |
+ " /* Constructor. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " State *curState;\n" |
+ " int accept;\n" |
+ " inline void ExecFuncs( int *funcs, char *p );\n" |
+ "/* The array of functions. */\n" |
+ "#if \n" |
+ "::State \n" |
+ "/* The array of trainsitions. */\n" |
+ "static \n" |
+ "::Trans \n" |
+ "/* The start state. */\n" |
+ "static \n" |
+ "::State *\n" |
+ " * Execute functions pointed to by funcs until the null function is found. \n" |
+ "inline void \n" |
+ "::ExecFuncs( int *funcs, char *p )\n" |
+ " int len = *funcs++;\n" |
+ " while ( len-- > 0 ) {\n" |
+ " switch ( *funcs++ ) {\n" |
+ " * Constructor\n" |
+ " Init();\n" |
+ "Init\n" |
+ "void \n" |
+ "::Init( )\n" |
+ " curState = \n" |
+ "_startState;\n" |
+ " accept = 0;\n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "::Execute( char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ " State *cs = curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " int c = (unsigned char)*p;\n" |
+ " Trans *trans;\n" |
+ " if ( cs == 0 )\n" |
+ " goto finished;\n" |
+ " /* If the character is within the index bounds then get the\n" |
+ " * transition for it. If it is out of the transition bounds\n" |
+ " * we will use the default transition. */\n" |
+ " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" |
+ " /* Use the index to look into the transition array. */\n" |
+ " trans = \n" |
+ "_t + cs->dflIndex;\n" |
+ " /* If there are functions for this transition then execute them. */\n" |
+ " if ( trans->funcs != 0 )\n" |
+ " ExecFuncs( trans->funcs, p );\n" |
+ " /* Move to the new state. */\n" |
+ " cs = trans->toState;\n" |
+ "finished:\n" |
+ " curState = cs;\n" |
+ "::Finish( )\n" |
+ " State *cs = curState;\n" |
+ " if ( cs != 0 && cs->isFinState ) {\n" |
+ " /* If finishing in a final state then execute the\n" |
+ " * out functions for it. (if any). */\n" |
+ " if ( cs->outFuncs != 0 )\n" |
+ " ExecFuncs( cs->outFuncs, 0 );\n" |
+ " accept = 1;\n" |
+ " else {\n" |
+ " /* If we are not in a final state then this\n" |
+ " * is an error. Move to the error state. */\n" |
+ " curState = 0;\n" |
+ "10TabCodeGen\n" |
+ "11CTabCodeGen\n" |
+ "12CCTabCodeGen\n" |
+ "10FsmCodeGen\n" |
+ "bad_alloc\n" |
+ " case \n" |
+ " break;\n" |
+ "/* Forward dec state for the transition structure. */\n" |
+ "struct \n" |
+ "StateStruct;\n" |
+ "/* A single transition. */\n" |
+ "struct \n" |
+ "TransStruct\n" |
+ " struct \n" |
+ "StateStruct *toState;\n" |
+ " int funcs;\n" |
+ "typedef struct \n" |
+ "TransStruct \n" |
+ "Trans;\n" |
+ "/* A single state. */\n" |
+ "struct \n" |
+ "StateStruct\n" |
+ " int lowIndex;\n" |
+ " int highIndex;\n" |
+ " void *transIndex;\n" |
+ " int dflIndex;\n" |
+ " int outFuncs;\n" |
+ " int isFinState;\n" |
+ "typedef struct \n" |
+ "StateStruct \n" |
+ "State;\n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ "State *curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "#define s \n" |
+ "#define i \n" |
+ "#define t \n" |
+ "/* The array of indicies into the transition array. */\n" |
+ "#if \n" |
+ "static \n" |
+ "_i[] = {\n" |
+ "#endif\n" |
+ "/* The aray of states. */\n" |
+ "static \n" |
+ "State \n" |
+ "_s[] = {\n" |
+ "/* The array of trainsitions. */\n" |
+ "static \n" |
+ "Trans \n" |
+ "_t[] = {\n" |
+ "/* The start state. */\n" |
+ "static \n" |
+ "State *\n" |
+ "_startState = s+\n" |
+ "#undef f\n" |
+ "#undef s\n" |
+ "#undef i\n" |
+ "#undef t\n" |
+ "/***************************************************************************\n" |
+ " * Execute functions pointed to by funcs until the null function is found. \n" |
+ "inline static void \n" |
+ "ExecFuncs( \n" |
+ " *fsm, int funcs, char *p )\n" |
+ " switch ( funcs ) {\n" |
+ "/****************************************\n" |
+ "Init\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ "/****************************************\n" |
+ "Accept\n" |
+ " * Did the fsm accept? \n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ "State *cs = fsm->curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " int c = (unsigned char)*p;\n" |
+ "Trans *trans;\n" |
+ " if ( cs == 0 )\n" |
+ " goto finished;\n" |
+ " /* If the character is within the index bounds then get the\n" |
+ " * transition for it. If it is out of the transition bounds\n" |
+ " * we will use the default transition. */\n" |
+ " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" |
+ " /* Use the index to look into the transition array. */\n" |
+ " trans = \n" |
+ "_t + \n" |
+ " ((\n" |
+ "*)cs->transIndex)[c - cs->lowIndex];\n" |
+ " else {\n" |
+ " /* Use the default index as the char is out of range. */\n" |
+ " trans = \n" |
+ "_t + cs->dflIndex;\n" |
+ " /* If there are functions for this transition then execute them. */\n" |
+ " if ( trans->funcs >= 0 )\n" |
+ "ExecFuncs( fsm, trans->funcs, p );\n" |
+ " /* Move to the new state. */\n" |
+ " cs = trans->toState;\n" |
+ "finished:\n" |
+ " fsm->curState = cs;\n" |
+ "/**********************************************************************\n" |
+ "Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ "State *cs = fsm->curState;\n" |
+ " if ( cs != 0 && cs->isFinState ) {\n" |
+ " /* If finishing in a final state then execute the\n" |
+ " * out functions for it. (if any). */\n" |
+ " if ( cs->outFuncs != 0 )\n" |
+ "ExecFuncs( fsm, cs->outFuncs, 0 );\n" |
+ " fsm->accept = 1;\n" |
+ " else {\n" |
+ " /* If we are not in a final state then this\n" |
+ " * is an error. Move to the error state. */\n" |
+ " fsm->curState = 0;\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Function and index type. */\n" |
+ " typedef int Func;\n" |
+ " /* Forward dec state for the transition structure. */\n" |
+ " struct State;\n" |
+ " /* A single transition. */\n" |
+ " struct Trans\n" |
+ " State *toState;\n" |
+ " int funcs;\n" |
+ " /* A single state. */\n" |
+ " struct State\n" |
+ " int lowIndex;\n" |
+ " int highIndex;\n" |
+ " void *transIndex;\n" |
+ " int dflIndex;\n" |
+ " int outFuncs;\n" |
+ " int isFinState;\n" |
+ " /* Constructor. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " State *curState;\n" |
+ " int accept;\n" |
+ " inline void ExecFuncs( int funcs, char *p );\n" |
+ "::State \n" |
+ "::Trans \n" |
+ "::State *\n" |
+ "/***************************************************************************\n" |
+ " * Execute functions pointed to by funcs until the null function is found. \n" |
+ "inline void \n" |
+ "::ExecFuncs( int funcs, char *p )\n" |
+ " switch ( funcs ) {\n" |
+ "/****************************************\n" |
+ " * Constructor\n" |
+ " Init();\n" |
+ "/****************************************\n" |
+ "::Init( )\n" |
+ " curState = \n" |
+ "_startState;\n" |
+ " accept = 0;\n" |
+ "/****************************************\n" |
+ " * Did the fsm accept? \n" |
+ "int \n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ "::Execute( char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ " State *cs = curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " int c = (unsigned char)*p;\n" |
+ " Trans *trans;\n" |
+ " if ( cs == 0 )\n" |
+ " goto finished;\n" |
+ " /* If the character is within the index bounds then get the\n" |
+ " * transition for it. If it is out of the transition bounds\n" |
+ " * we will use the default transition. */\n" |
+ " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" |
+ " /* Use the index to look into the transition array. */\n" |
+ " trans = \n" |
+ "_t + cs->dflIndex;\n" |
+ " /* If there are functions for this transition then execute them. */\n" |
+ " if ( trans->funcs != 0 )\n" |
+ " ExecFuncs( trans->funcs, p );\n" |
+ " /* Move to the new state. */\n" |
+ " cs = trans->toState;\n" |
+ "finished:\n" |
+ " curState = cs;\n" |
+ "/**********************************************************************\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ "::Finish( )\n" |
+ " State *cs = curState;\n" |
+ " if ( cs != 0 && cs->isFinState ) {\n" |
+ " /* If finishing in a final state then execute the\n" |
+ " * out functions for it. (if any). */\n" |
+ " if ( cs->outFuncs != 0 )\n" |
+ " ExecFuncs( cs->outFuncs, 0 );\n" |
+ " accept = 1;\n" |
+ " else {\n" |
+ " /* If we are not in a final state then this\n" |
+ " * is an error. Move to the error state. */\n" |
+ " curState = 0;\n" |
+ "11FTabCodeGen\n" |
+ "12CFTabCodeGen\n" |
+ "13CCFTabCodeGen\n" |
+ "bad_alloc\n" |
+ "cs = -1; \n" |
+ "cs = \n" |
+ "break;\n" |
+ " switch( cs ) {\n" |
+ " case \n" |
+ " switch ( c ) {\n" |
+ "case \n" |
+ "default: \n" |
+ " }\n" |
+ " break;\n" |
+ " switch( cs ) {\n" |
+ "accept = 1; \n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "/* The start state. */\n" |
+ "static int \n" |
+ "_startState = \n" |
+ "/****************************************\n" |
+ "Init\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ " int cs = fsm->curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " unsigned char c = (unsigned char)*p;\n" |
+ " fsm->curState = cs;\n" |
+ "/**********************************************************************\n" |
+ "Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " int cs = fsm->curState;\n" |
+ " int accept = 0;\n" |
+ " fsm->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ "/* Only non-static data: current state. */\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Init the fsm. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ " /* The start state. */\n" |
+ " static int startState;\n" |
+ "/* The start state. */\n" |
+ "int \n" |
+ "::startState = \n" |
+ " Init();\n" |
+ "/****************************************\n" |
+ "::Init\n" |
+ "void \n" |
+ "::Init( )\n" |
+ " curState = startState;\n" |
+ " accept = 0;\n" |
+ "::Execute( char *data, int dlen )\n" |
+ " char *p = data;\n" |
+ " int len = dlen;\n" |
+ " int cs = curState;\n" |
+ " for ( ; len > 0; p++, len-- ) {\n" |
+ " unsigned char c = (unsigned char)*p;\n" |
+ " curState = cs;\n" |
+ "/**********************************************************************\n" |
+ "::Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ "::Finish( )\n" |
+ " int cs = curState;\n" |
+ " int accept = 0;\n" |
+ " this->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "::Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "10SelCodeGen\n" |
+ "11CSelCodeGen\n" |
+ "12CCSelCodeGen\n" |
+ "10FsmCodeGen\n" |
+ "bad_alloc\n" |
+ "goto tr\n" |
+ "goto st\n" |
+ "goto err;\n" |
+ " case \n" |
+ "break;}\n" |
+ ": goto st\n" |
+ " case \n" |
+ " default: return;\n" |
+ " goto st\n" |
+ " if ( --len == 0 )\n" |
+ " goto out\n" |
+ " switch( (alph) *++p ) {\n" |
+ "case \n" |
+ " default: \n" |
+ " return;\n" |
+ "curState = \n" |
+ " switch( cs ) {\n" |
+ "accept = 1; \n" |
+ "break;\n" |
+ "err:\n" |
+ "curState = -1;\n" |
+ ", p );\n" |
+ "ExecFuncs( fsm, f+\n" |
+ "fsm->\n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "/* The start state. */\n" |
+ "static int \n" |
+ "_startState = \n" |
+ "#define f \n" |
+ "#define alph unsigned char\n" |
+ "/* The array of functions. */\n" |
+ "#if \n" |
+ "static int \n" |
+ "_f[] = {\n" |
+ "#endif\n" |
+ "/****************************************\n" |
+ "Init\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ "/***************************************************************************\n" |
+ " * Function exection. We do not inline this as in tab\n" |
+ " * code gen because if we did, we might as well just expand \n" |
+ " * the function as in the faster goto code generator.\n" |
+ "static void \n" |
+ "ExecFuncs( \n" |
+ " *fsm, int *funcs, char *p )\n" |
+ " int len = *funcs++;\n" |
+ " while ( len-- > 0 ) {\n" |
+ " switch ( *funcs++ ) {\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data - 1;\n" |
+ " register int len = dlen + 1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( \n" |
+ "curState ) {\n" |
+ "/**********************************************************************\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " int cs = fsm->curState;\n" |
+ " int accept = 0;\n" |
+ " fsm->accept = accept;\n" |
+ "/*******************************************************\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ "#undef f\n" |
+ "#undef alph\n" |
+ " ExecFuncs( f+\n" |
+ "/* Only non-static data: current state. */\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Init the fsm. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ " /* The start state. */\n" |
+ " static int startState;\n" |
+ " /* Function exection. We do not inline this as in tab code gen\n" |
+ " * because if we did, we might as well just expand the function \n" |
+ " * as in the faster goto code generator. */\n" |
+ " void ExecFuncs( int *funcs, char * );\n" |
+ "/* The start state. */\n" |
+ "int \n" |
+ "::startState = \n" |
+ "/* some defines to lessen the code size. */\n" |
+ "#define f \n" |
+ "#endif\n" |
+ "/****************************************\n" |
+ " * Make sure the fsm is initted.\n" |
+ " Init();\n" |
+ "/****************************************\n" |
+ " * Initialize the fsm.\n" |
+ "void \n" |
+ "::Init( )\n" |
+ " curState = startState;\n" |
+ " accept = 0;\n" |
+ "/***************************************************************************\n" |
+ " * Execute functions pointed to by funcs until the null function is found. \n" |
+ "void \n" |
+ "::ExecFuncs( int *funcs, char *p )\n" |
+ " int len = *funcs++;\n" |
+ " while ( len-- > 0 ) {\n" |
+ " switch ( *funcs++ ) {\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ "::Execute( char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data - 1;\n" |
+ " register int len = dlen + 1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( curState ) {\n" |
+ "/**********************************************************************\n" |
+ "::Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ "::Finish( )\n" |
+ " int cs = curState;\n" |
+ " int accept = 0;\n" |
+ " this->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "::Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "#undef f\n" |
+ "#undef alph\n" |
+ "11GotoCodeGen\n" |
+ "12CGotoCodeGen\n" |
+ "13CCGotoCodeGen\n" |
+ "10FsmCodeGen\n" |
+ "bad_alloc\n" |
+ " case \n" |
+ " break;\n" |
+ ", p );\n" |
+ "ExecFuncs( fsm, \n" |
+ "fsm->\n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "/* The start state. */\n" |
+ "static int \n" |
+ "_startState = \n" |
+ "/****************************************\n" |
+ "Init\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ "/***************************************************************************\n" |
+ " * Function exection. We do not inline this as in tab\n" |
+ " * code gen because if we did, we might as well just expand \n" |
+ " * the function as in the faster goto code generator.\n" |
+ "static void \n" |
+ "ExecFuncs( \n" |
+ " *fsm, int func, char *p )\n" |
+ " switch ( func ) {\n" |
+ "#define alph unsigned char\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data-1;\n" |
+ " register int len = dlen+1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( \n" |
+ "curState ) {\n" |
+ "/**********************************************************************\n" |
+ "Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " int cs = fsm->curState;\n" |
+ " int accept = 0;\n" |
+ " fsm->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ "#undef alph\n" |
+ " ExecFuncs( \n" |
+ "/* Only non-static data: current state. */\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Init the fsm. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ " /* The start state. */\n" |
+ " static int startState;\n" |
+ " /* Function exection. We do not inline this as in tab code gen\n" |
+ " * because if we did, we might as well just expand the function \n" |
+ " * as in the faster goto code generator. */\n" |
+ " void ExecFuncs( int func, char *p );\n" |
+ "/* The start state. */\n" |
+ "int \n" |
+ "::startState = \n" |
+ " Init();\n" |
+ "/****************************************\n" |
+ "::Init\n" |
+ "void \n" |
+ "::Init( )\n" |
+ " curState = startState;\n" |
+ " accept = 0;\n" |
+ "/***************************************************************************\n" |
+ " * Execute functions pointed to by funcs until the null function is found. \n" |
+ "void \n" |
+ "::ExecFuncs( int func, char *p )\n" |
+ " switch ( func ) {\n" |
+ "::Execute( char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data-1;\n" |
+ " register int len = dlen+1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( curState ) {\n" |
+ "::Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ "::Finish( )\n" |
+ " int cs = curState;\n" |
+ " int accept = 0;\n" |
+ " this->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "::Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "#undef alph\n" |
+ "12FGotoCodeGen\n" |
+ "13CFGotoCodeGen\n" |
+ "14CCFGotoCodeGen\n" |
+ "11GotoCodeGen\n" |
+ "10FsmCodeGen\n" |
+ "bad_alloc\n" |
+ "fsm->\n" |
+ "/* Only non-static data: current state. */\n" |
+ "struct \n" |
+ "Struct\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ "typedef struct \n" |
+ "Struct \n" |
+ "/* Init the fsm. */\n" |
+ "void \n" |
+ "Init( \n" |
+ " *fsm );\n" |
+ "/* Execute some chunk of data. */\n" |
+ "void \n" |
+ "Execute( \n" |
+ " *fsm, char *data, int dlen );\n" |
+ "/* Indicate to the fsm tha there is no more data. */\n" |
+ "void \n" |
+ "Finish( \n" |
+ " *fsm );\n" |
+ "/* Did the machine accept? */\n" |
+ "int \n" |
+ "Accept( \n" |
+ " *fsm );\n" |
+ "/* The start state. */\n" |
+ "static int \n" |
+ "_startState = \n" |
+ "/****************************************\n" |
+ "Init\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " fsm->curState = \n" |
+ "_startState;\n" |
+ " fsm->accept = 0;\n" |
+ "#define alph unsigned char\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ " *fsm, char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data-1;\n" |
+ " register int len = dlen+1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( \n" |
+ "curState ) {\n" |
+ "/**********************************************************************\n" |
+ "Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ " *fsm )\n" |
+ " int cs = fsm->curState;\n" |
+ " int accept = 0;\n" |
+ " fsm->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ " *fsm )\n" |
+ " return fsm->accept;\n" |
+ "#undef alph\n" |
+ "/* Only non-static data: current state. */\n" |
+ "class \n" |
+ "public:\n" |
+ " /* Init the fsm. */\n" |
+ " void Init( );\n" |
+ " /* Execute some chunk of data. */\n" |
+ " void Execute( char *data, int dlen );\n" |
+ " /* Indicate to the fsm tha there is no more data. */\n" |
+ " void Finish( );\n" |
+ " /* Did the machine accept? */\n" |
+ " int Accept( );\n" |
+ " int curState;\n" |
+ " int accept;\n" |
+ " /* The start state. */\n" |
+ " static int startState;\n" |
+ "/* The start state. */\n" |
+ "int \n" |
+ "::startState = \n" |
+ " Init();\n" |
+ "/****************************************\n" |
+ "::Init\n" |
+ "void \n" |
+ "::Init( )\n" |
+ " curState = startState;\n" |
+ " accept = 0;\n" |
+ "#define alph unsigned char\n" |
+ "/**********************************************************************\n" |
+ " * Execute the fsm on some chunk of data. \n" |
+ "void \n" |
+ "::Execute( char *data, int dlen )\n" |
+ " /* Prime these to one back to simulate entering the \n" |
+ " * machine on a transition. */ \n" |
+ " register char *p = data-1;\n" |
+ " register int len = dlen+1;\n" |
+ " /* Switch statment to enter the machine. */\n" |
+ " switch ( curState ) {\n" |
+ "::Finish\n" |
+ " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" |
+ "void \n" |
+ "::Finish( )\n" |
+ " int cs = curState;\n" |
+ " int accept = 0;\n" |
+ " this->accept = accept;\n" |
+ "/*******************************************************\n" |
+ "::Accept\n" |
+ " * Did the machine accept?\n" |
+ "int \n" |
+ "::Accept( )\n" |
+ " return accept;\n" |
+ "#undef alph\n" |
+ "13IpGotoCodeGen\n" |
+ "14CIpGotoCodeGen\n" |
+ "15CCIpGotoCodeGen\n" |
+ "11GotoCodeGen\n" |
+ "10FsmCodeGen\n";
+}%%
+
+%% write data;
+struct strs the_fsm;
+
+void test( char *buf )
+{
+ struct strs *fsm = &the_fsm;
+ char *p = buf;
+ char *pe = buf + strlen( buf );
+
+ %% write init;
+ %% write exec;
+
+ %% write eof;
+
+ if ( fsm->cs >= strs_first_final )
+ printf("ACCEPT\n");
+ else
+ printf("FAIL\n");
+}
+
+
+int main()
+{
+ test( "stdin\n" );
+ test( "bad_alloc\n" );
+ test( "_GLOBAL_OFFSET_TABLE_\n" );
+ test( "not in\n" );
+ test(
+ "isatty\n"
+ "junk on end.\n"
+ );
+
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+ACCEPT
+ACCEPT
+ACCEPT
+FAIL
+FAIL
+#endif
diff --git a/test/testcase.txl b/test/testcase.txl
new file mode 100644
index 0000000..cd02bb8
--- /dev/null
+++ b/test/testcase.txl
@@ -0,0 +1,177 @@
+comments
+ '#
+end comments
+
+tokens
+ union "\[[(\\\c)#\]]*\]"
+end tokens
+
+compounds
+ '%% '%%{ '}%% '== ':= '-> '<> '>= '<= '=>
+ '|* '*|
+ '>! '<! '$! '%! '@! '<>!
+ '>/ '</ '$/ '%/ '@/ '<>/
+end compounds
+
+keys
+ 'int 'bool 'true 'false 'char 'ptr
+ 'if 'else 'printi 'prints
+ 'fc 'fpc 'fbreak 'fgoto 'fcall 'fret 'fhold 'fexec
+end keys
+
+define lang_indep
+ [al_statements]
+ '%% [NL]
+ [al_statements]
+ [ragel_def]
+end define
+
+define ragel_def
+ '%%{ [NL] [IN]
+ [ragel_program]
+ [EX] '}%% [NL]
+end define
+
+define ragel_program
+ [repeat statement]
+end define
+
+define statement
+ [machine_stmt]
+ | [action_stmt]
+ | [cond_action_stmt]
+ | [machine_def]
+ | [machine_inst]
+end define
+
+define machine_stmt
+ 'machine [id] '; [NL]
+end define
+
+define action_stmt
+ 'action [id] [al_host_block]
+end define
+
+define cond_action_stmt
+ 'action [id] '{ [al_expr] '} [NL]
+end define
+
+define al_statements
+ [repeat action_lang_stmt]
+end define
+
+define action_lang_stmt
+ [al_ragel_stmt]
+ | [al_variable_decl]
+ | [al_expr_stmt]
+ | [al_if_stmt]
+ | [al_print_stmt]
+ | '{ [al_statements] '}
+end define
+
+define al_print_stmt
+ [print_cmd] [al_expr] '; [NL]
+end define
+
+define print_cmd
+ 'printi | 'prints
+end define
+
+define al_variable_decl
+ [al_type_decl] [id] [opt union] '; [NL]
+end define
+
+define al_array_decl
+ '[ [number] ']
+end define
+
+define al_type_decl
+ 'int | 'bool | 'char | 'ptr
+end define
+
+define al_expr_stmt
+ [al_expr] '; [NL]
+end define
+
+define al_expr
+ [al_term] [repeat al_expr_extend]
+end define
+
+define al_expr_extend
+ [al_expr_op] [al_term]
+end define
+
+define al_expr_op
+ '= | '+ | '- | '* | '/ | '== | '<= | '>=
+end define
+
+define al_term
+ [id]
+ | [opt al_sign] [number]
+ | [stringlit]
+ | [charlit]
+ | 'fc
+ | 'true
+ | 'false
+ | '( [al_expr] ')
+end define
+
+define al_sign
+ '- | '+
+end define
+
+define al_if_stmt
+ 'if '( [al_expr] ') [NL] [IN]
+ [action_lang_stmt] [EX]
+ [opt al_else]
+end define
+
+define al_else
+ 'else [NL] [IN]
+ [action_lang_stmt] [EX]
+end define
+
+define al_ragel_stmt
+ 'fbreak '; [NL]
+ | 'fhold '; [NL]
+ | 'fexec [repeat al_expr] '; [NL]
+ | 'fnext [id] '; [NL]
+ | 'fgoto [id] '; [NL]
+ | 'fcall [id] '; [NL]
+ | 'fnext '* [repeat al_expr] '; [NL]
+ | 'fgoto '* [repeat al_expr] '; [NL]
+ | 'fcall '* [repeat al_expr] '; [NL]
+ | 'fret '; [NL]
+end define
+
+define machine_def
+ [id] '= [machine_expr] '; [NL]
+end define
+
+define machine_inst
+ [id] ':= [machine_expr] '; [NL]
+end define
+
+define machine_expr
+ [repeat machine_expr_item]
+end define
+
+define scanner_item
+ [repeat machine_expr_item] '; [NL]
+end define
+
+define machine_expr_item
+ [action_embed] [al_host_block]
+ | '|* [repeat scanner_item] '*|
+ | [not ';] [not '*|] [token]
+end define
+
+define al_host_block
+ '{ [NL] [IN] [al_statements] [EX] '} [NL]
+end define
+
+define action_embed
+ '> | '$ | '@ | '% |
+ '$! | '=>
+end define
+
diff --git a/test/tokstart1.rl b/test/tokstart1.rl
new file mode 100644
index 0000000..b6df225
--- /dev/null
+++ b/test/tokstart1.rl
@@ -0,0 +1,241 @@
+/*
+ * @LANG: c++
+ */
+
+#include <iostream>
+#include <string.h>
+using namespace std;
+
+extern char buf[];
+
+struct Scanner
+{
+ int cs, act;
+ char *tokstart, *tokend;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ int execute( char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine Scanner;
+
+ action to_act {
+ cout << "to: fc = ";
+ if ( fc == '\'' )
+ cout << (int)fc;
+ else
+ cout << fc;
+ cout << " tokstart = " << ( tokstart == 0 ? -1 : tokstart-buf ) << endl;
+ }
+ action from_act {
+ cout << "from: fc = ";
+ if ( fc == '\'' )
+ cout << (int)fc;
+ else
+ cout << fc;
+ cout << " tokstart = " << ( tokstart == 0 ? -1 : tokstart-buf ) << endl;
+ }
+
+ c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act;
+ cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act;
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act;
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act;
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ ( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? ) $~ to_act $* from_act;
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act;
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act;
+
+ # Three char compounds, first item already buffered. */
+ ( '...' ) $~ to_act $* from_act;
+
+ # Single char symbols.
+ ( punct - [_"'] ) $~ to_act $* from_act;
+
+ # Comments and whitespace.
+ ( '/*' ) $~ to_act $* from_act { fgoto c_comm; };
+ ( '//' ) $~ to_act $* from_act { fgoto cxx_comm; };
+
+ ( any - 33..126 )+ $~ to_act $* from_act;
+
+ *|;
+}%%
+
+%% write data;
+
+void Scanner::init( )
+{
+ %% write init;
+}
+
+int Scanner::execute( char *data, int len )
+{
+ char *p = data;
+ char *pe = data + len;
+
+ %% write exec;
+
+ int have = 0;
+ if ( tokstart != 0 ) {
+ have = pe - tokstart;
+ memmove( data, tokstart, have );
+ }
+ return have;
+}
+
+int Scanner::finish( )
+{
+ %% write eof;
+ if ( cs == Scanner_error )
+ return -1;
+ if ( cs >= Scanner_first_final )
+ return 1;
+ return 0;
+}
+
+void test( )
+{
+ int len = strlen( buf );
+ Scanner scanner;
+
+ scanner.init();
+ scanner.execute( buf, len );
+ if ( scanner.cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cout << "PARSE ERROR" << endl;
+ }
+ scanner.finish();
+}
+
+char buf[4096];
+
+int main()
+{
+ strcpy( buf,
+ "a b 0.98 /*\n"
+ "9 */'\\''//hi\n"
+ "there\n"
+ );
+ test();
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+from: fc = a tokstart = 0
+to: fc = a tokstart = 0
+from: fc = tokstart = 0
+to: fc = a tokstart = -1
+from: fc = tokstart = 1
+to: fc = tokstart = 1
+from: fc = b tokstart = 1
+to: fc = tokstart = -1
+from: fc = b tokstart = 2
+to: fc = b tokstart = 2
+from: fc = tokstart = 2
+to: fc = b tokstart = -1
+from: fc = tokstart = 3
+to: fc = tokstart = 3
+from: fc = 0 tokstart = 3
+to: fc = tokstart = -1
+from: fc = 0 tokstart = 4
+to: fc = 0 tokstart = 4
+from: fc = . tokstart = 4
+to: fc = . tokstart = 4
+from: fc = 9 tokstart = 4
+to: fc = 9 tokstart = 4
+from: fc = 8 tokstart = 4
+to: fc = 8 tokstart = 4
+from: fc = tokstart = 4
+to: fc = 8 tokstart = -1
+from: fc = tokstart = 8
+to: fc = tokstart = 8
+from: fc = / tokstart = 8
+to: fc = tokstart = -1
+from: fc = / tokstart = 9
+to: fc = / tokstart = 9
+from: fc = * tokstart = 9
+to: fc = * tokstart = -1
+from: fc =
+ tokstart = -1
+to: fc =
+ tokstart = -1
+from: fc = 9 tokstart = -1
+to: fc = 9 tokstart = -1
+from: fc = tokstart = -1
+to: fc = tokstart = -1
+from: fc = * tokstart = -1
+to: fc = * tokstart = -1
+from: fc = / tokstart = -1
+to: fc = / tokstart = -1
+from: fc = 39 tokstart = 16
+to: fc = 39 tokstart = 16
+from: fc = \ tokstart = 16
+to: fc = \ tokstart = 16
+from: fc = 39 tokstart = 16
+to: fc = 39 tokstart = 16
+from: fc = 39 tokstart = 16
+to: fc = 39 tokstart = -1
+from: fc = / tokstart = 20
+to: fc = / tokstart = 20
+from: fc = / tokstart = 20
+to: fc = / tokstart = -1
+from: fc = h tokstart = -1
+to: fc = h tokstart = -1
+from: fc = i tokstart = -1
+to: fc = i tokstart = -1
+from: fc =
+ tokstart = -1
+to: fc =
+ tokstart = -1
+from: fc = t tokstart = 25
+to: fc = t tokstart = 25
+from: fc = h tokstart = 25
+to: fc = h tokstart = 25
+from: fc = e tokstart = 25
+to: fc = e tokstart = 25
+from: fc = r tokstart = 25
+to: fc = r tokstart = 25
+from: fc = e tokstart = 25
+to: fc = e tokstart = 25
+from: fc =
+ tokstart = 25
+to: fc = e tokstart = -1
+from: fc =
+ tokstart = 30
+to: fc =
+ tokstart = 30
+#endif
diff --git a/test/union.rl b/test/union.rl
new file mode 100644
index 0000000..6ad7e9a
--- /dev/null
+++ b/test/union.rl
@@ -0,0 +1,189 @@
+/*
+ * @LANG: c++
+ * Show off concurrent abilities.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct Concurrent
+{
+ int cur_char;
+ int start_word;
+ int start_comment;
+ int start_literal;
+
+ int cs;
+
+ // Initialize the machine. Invokes any init statement blocks. Returns 0
+ // if the machine begins in a non-accepting state and 1 if the machine
+ // begins in an accepting state.
+ void init( );
+
+ // Execute the machine on a block of data. Returns -1 if after processing
+ // the data, the machine is in the error state and can never accept, 0 if
+ // the machine is in a non-accepting state and 1 if the machine is in an
+ // accepting state.
+ void execute( const char *data, int len );
+
+ // Indicate that there is no more data. Returns -1 if the machine finishes
+ // in the error state and does not accept, 0 if the machine finishes
+ // in any other non-accepting state and 1 if the machine finishes in an
+ // accepting state.
+ int finish( );
+};
+
+%%{
+ machine Concurrent;
+
+ action next_char {
+ cur_char += 1;
+ }
+
+ action start_word {
+ start_word = cur_char;
+ }
+ action end_word {
+ cout << "word: " << start_word <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_comment {
+ start_comment = cur_char;
+ }
+ action end_comment {
+ cout << "comment: " << start_comment <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_literal {
+ start_literal = cur_char;
+ }
+ action end_literal {
+ cout << "literal: " << start_literal <<
+ " " << cur_char-1 << endl;
+ }
+
+ # Count characters.
+ chars = ( any @next_char )*;
+
+ # Words are non-whitespace.
+ word = ( any-space )+ >start_word %end_word;
+ words = ( ( word | space ) $1 %0 )*;
+
+ # Finds C style comments.
+ comment = ( '/*' any* $0 '*/'@1 ) >start_comment %end_comment;
+ comments = ( ( comment | any ) $1 %0 )*;
+
+ # Finds single quoted strings.
+ literalChar = ( any - ['\\] ) | ( '\\' . any );
+ literal = ('\'' literalChar* '\'' ) >start_literal %end_literal;
+ literals = ( ( literal | (any-'\'') ) $1 %0 )*;
+
+ main := chars | words | comments | literals;
+}%%
+
+%% write data;
+
+void Concurrent::init( )
+{
+ cur_char = 0;
+ %% write init;
+}
+
+void Concurrent::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int Concurrent::finish( )
+{
+ %% write eof;
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+void test( char *buf )
+{
+ Concurrent concurrent;
+ concurrent.init();
+ concurrent.execute( buf, strlen(buf) );
+ if ( concurrent.finish() > 0 )
+ cout << "ACCEPT" << endl;
+ else
+ cout << "FAIL" << endl;
+}
+
+int main()
+{
+ test(
+ "/* in a comment,\n"
+ " * ' and now in a literal string\n"
+ " */ \n"
+ " \n"
+ "the comment has now ended but the literal string lives on\n"
+ "\n"
+ "' comment closed\n" );
+ test( "/* * ' \\' */ \\' '\n" );
+ test( "/**/'\\''/*/*/\n" );
+ return 0;
+}
+
+#ifdef _____OUTPUT_____
+word: 1 2
+word: 4 5
+word: 7 7
+word: 9 16
+word: 19 19
+word: 21 21
+word: 23 25
+word: 27 29
+word: 31 32
+word: 34 34
+word: 36 42
+word: 44 49
+word: 52 53
+comment: 1 53
+word: 58 60
+word: 62 68
+word: 70 72
+word: 74 76
+word: 78 82
+word: 84 86
+word: 88 90
+word: 92 98
+word: 100 105
+word: 107 111
+word: 113 114
+word: 117 117
+literal: 21 117
+word: 119 125
+word: 127 132
+ACCEPT
+word: 1 2
+word: 4 4
+word: 6 6
+word: 8 9
+word: 11 12
+comment: 1 12
+word: 14 15
+word: 17 17
+literal: 6 17
+ACCEPT
+comment: 1 4
+literal: 5 8
+word: 1 13
+comment: 9 13
+ACCEPT
+#endif
diff --git a/test/xml.rl b/test/xml.rl
new file mode 100644
index 0000000..ca13f43
--- /dev/null
+++ b/test/xml.rl
@@ -0,0 +1,108 @@
+/*
+ * XML parser based on the XML 1.0 BNF from:
+ * http://www.jelks.nu/XML/xmlebnf.html
+ *
+ * @LANG: c++
+ * @ALLOW_MINFLAGS: -l -e
+ * @ALLOW_GENFLAGS: -T0 -T1
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct XML
+{
+ int cur_char;
+ int start_word;
+ int start_comment;
+ int start_literal;
+
+ int cs, top, stack[1024];
+
+ int init( );
+ int execute( const unsigned short *data, int len );
+ int finish( );
+};
+
+%%{
+ machine XML;
+ alphtype unsigned short;
+
+ action next_char {
+ cur_char += 1;
+ }
+
+ action start_word {
+ start_word = cur_char;
+ }
+ action end_word {
+ cout << "word: " << start_word <<
+ " " << cur_char-1 << endl;
+ }
+
+ Extender = 0x00B7 | 0x02D0 | 0x02D1 | 0x0387 | 0x0640 | 0x0E46 | 0x0EC6 | 0x3005 | (0x3031..0x3035) | (0x309D..0x309E) | (0x30FC..0x30FE);
+
+ Digit = (0x0030..0x0039) | (0x0660..0x0669) | (0x06F0..0x06F9) | (0x0966..0x096F) | (0x09E6..0x09EF) | (0x0A66..0x0A6F) | (0x0AE6..0x0AEF) | (0x0B66..0x0B6F) | (0x0BE7..0x0BEF) | (0x0C66..0x0C6F) | (0x0CE6..0x0CEF) | (0x0D66..0x0D6F) | (0x0E50..0x0E59) | (0x0ED0..0x0ED9) | (0x0F20..0x0F29);
+
+ CombiningChar = (0x0300..0x0345) | (0x0360..0x0361) | (0x0483..0x0486) | (0x0591..0x05A1) | (0x05A3..0x05B9) | (0x05BB..0x05BD) | 0x05BF | (0x05C1..0x05C2) | 0x05C4 | (0x064B..0x0652) | 0x0670 | (0x06D6..0x06DC) | (0x06DD..0x06DF) | (0x06E0..0x06E4) | (0x06E7..0x06E8) | (0x06EA..0x06ED) | (0x0901..0x0903) | 0x093C | (0x093E..0x094C) | 0x094D | (0x0951..0x0954) | (0x0962..0x0963) | (0x0981..0x0983) | 0x09BC | 0x09BE | 0x09BF | (0x09C0..0x09C4) | (0x09C7..0x09C8) | (0x09CB..0x09CD) | 0x09D7 | (0x09E2..0x09E3) | 0x0A02 | 0x0A3C | 0x0A3E | 0x0A3F | (0x0A40..0x0A42) | (0x0A47..0x0A48) | (0x0A4B..0x0A4D) | (0x0A70..0x0A71) | (0x0A81..0x0A83) | 0x0ABC | (0x0ABE..0x0AC5) | (0x0AC7..0x0AC9) | (0x0ACB..0x0ACD) | (0x0B01..0x0B03) | 0x0B3C | (0x0B3E..0x0B43) | (0x0B47..0x0B48) | (0x0B4B..0x0B4D) | (0x0B56..0x0B57) | (0x0B82..0x0B83) | (0x0BBE..0x0BC2) | (0x0BC6..0x0BC8) | (0x0BCA..0x0BCD) | 0x0BD7 | (0x0C01..0x0C03) | (0x0C3E..0x0C44) | (0x0C46..0x0C48) | (0x0C4A..0x0C4D) | (0x0C55..0x0C56) | (0x0C82..0x0C83) | (0x0CBE..0x0CC4) | (0x0CC6..0x0CC8) | (0x0CCA..0x0CCD) | (0x0CD5..0x0CD6) | (0x0D02..0x0D03) | (0x0D3E..0x0D43) | (0x0D46..0x0D48) | (0x0D4A..0x0D4D) | 0x0D57 | 0x0E31 | (0x0E34..0x0E3A) | (0x0E47..0x0E4E) | 0x0EB1 | (0x0EB4..0x0EB9) | (0x0EBB..0x0EBC) | (0x0EC8..0x0ECD) | (0x0F18..0x0F19) | 0x0F35 | 0x0F37 | 0x0F39 | 0x0F3E | 0x0F3F | (0x0F71..0x0F84) | (0x0F86..0x0F8B) | (0x0F90..0x0F95) | 0x0F97 | (0x0F99..0x0FAD) | (0x0FB1..0x0FB7) | 0x0FB9 | (0x20D0..0x20DC) | 0x20E1 | (0x302A..0x302F) | 0x3099 | 0x309A;
+
+ Ideographic = (0x4E00..0x9FA5) | 0x3007 | (0x3021..0x3029);
+
+ BaseChar = (0x0041..0x005A) | (0x0061..0x007A) | (0x00C0..0x00D6) | (0x00D8..0x00F6) | (0x00F8..0x00FF) | (0x0100..0x0131) | (0x0134..0x013E) | (0x0141..0x0148) | (0x014A..0x017E) | (0x0180..0x01C3) | (0x01CD..0x01F0) | (0x01F4..0x01F5) | (0x01FA..0x0217) | (0x0250..0x02A8) | (0x02BB..0x02C1) | 0x0386 | (0x0388..0x038A) | 0x038C | (0x038E..0x03A1) | (0x03A3..0x03CE) | (0x03D0..0x03D6) | 0x03DA | 0x03DC | 0x03DE | 0x03E0 | (0x03E2..0x03F3) | (0x0401..0x040C) | (0x040E..0x044F) | (0x0451..0x045C) | (0x045E..0x0481) | (0x0490..0x04C4) | (0x04C7..0x04C8) | (0x04CB..0x04CC) | (0x04D0..0x04EB) | (0x04EE..0x04F5) | (0x04F8..0x04F9) | (0x0531..0x0556) | 0x0559 | (0x0561..0x0586) | (0x05D0..0x05EA) | (0x05F0..0x05F2) | (0x0621..0x063A) | (0x0641..0x064A) | (0x0671..0x06B7) | (0x06BA..0x06BE) | (0x06C0..0x06CE) | (0x06D0..0x06D3) | 0x06D5 | (0x06E5..0x06E6) | (0x0905..0x0939) | 0x093D | (0x0958..0x0961) | (0x0985..0x098C) | (0x098F..0x0990) | (0x0993..0x09A8) | (0x09AA..0x09B0) | 0x09B2 | (0x09B6..0x09B9) | (0x09DC..0x09DD) | (0x09DF..0x09E1) | (0x09F0..0x09F1) | (0x0A05..0x0A0A) | (0x0A0F..0x0A10) | (0x0A13..0x0A28) | (0x0A2A..0x0A30) | (0x0A32..0x0A33) | (0x0A35..0x0A36) | (0x0A38..0x0A39) | (0x0A59..0x0A5C) | 0x0A5E | (0x0A72..0x0A74) | (0x0A85..0x0A8B) | 0x0A8D | (0x0A8F..0x0A91) | (0x0A93..0x0AA8) | (0x0AAA..0x0AB0) | (0x0AB2..0x0AB3) | (0x0AB5..0x0AB9) | 0x0ABD | 0x0AE0 | (0x0B05..0x0B0C) | (0x0B0F..0x0B10) | (0x0B13..0x0B28) | (0x0B2A..0x0B30) | (0x0B32..0x0B33) | (0x0B36..0x0B39) | 0x0B3D | (0x0B5C..0x0B5D) | (0x0B5F..0x0B61) | (0x0B85..0x0B8A) | (0x0B8E..0x0B90) | (0x0B92..0x0B95) | (0x0B99..0x0B9A) | 0x0B9C | (0x0B9E..0x0B9F) | (0x0BA3..0x0BA4) | (0x0BA8..0x0BAA) | (0x0BAE..0x0BB5) | (0x0BB7..0x0BB9) | (0x0C05..0x0C0C) | (0x0C0E..0x0C10) | (0x0C12..0x0C28) | (0x0C2A..0x0C33) | (0x0C35..0x0C39) | (0x0C60..0x0C61) | (0x0C85..0x0C8C) | (0x0C8E..0x0C90) | (0x0C92..0x0CA8) | (0x0CAA..0x0CB3) | (0x0CB5..0x0CB9) | 0x0CDE | (0x0CE0..0x0CE1) | (0x0D05..0x0D0C) | (0x0D0E..0x0D10) | (0x0D12..0x0D28) | (0x0D2A..0x0D39) | (0x0D60..0x0D61) | (0x0E01..0x0E2E) | 0x0E30 | (0x0E32..0x0E33) | (0x0E40..0x0E45) | (0x0E81..0x0E82) | 0x0E84 | (0x0E87..0x0E88) | 0x0E8A | 0x0E8D | (0x0E94..0x0E97) | (0x0E99..0x0E9F) | (0x0EA1..0x0EA3) | 0x0EA5 | 0x0EA7 | (0x0EAA..0x0EAB) | (0x0EAD..0x0EAE) | 0x0EB0 | (0x0EB2..0x0EB3) | 0x0EBD | (0x0EC0..0x0EC4) | (0x0F40..0x0F47) | (0x0F49..0x0F69) | (0x10A0..0x10C5) | (0x10D0..0x10F6) | 0x1100 | (0x1102..0x1103) | (0x1105..0x1107) | 0x1109 | (0x110B..0x110C) | (0x110E..0x1112) | 0x113C | 0x113E | 0x1140 | 0x114C | 0x114E | 0x1150 | (0x1154..0x1155) | 0x1159 | (0x115F..0x1161) | 0x1163 | 0x1165 | 0x1167 | 0x1169 | (0x116D..0x116E) | (0x1172..0x1173) | 0x1175 | 0x119E | 0x11A8 | 0x11AB | (0x11AE..0x11AF) | (0x11B7..0x11B8) | 0x11BA | (0x11BC..0x11C2) | 0x11EB | 0x11F0 | 0x11F9 | (0x1E00..0x1E9B) | (0x1EA0..0x1EF9) | (0x1F00..0x1F15) | (0x1F18..0x1F1D) | (0x1F20..0x1F45) | (0x1F48..0x1F4D) | (0x1F50..0x1F57) | 0x1F59 | 0x1F5B | 0x1F5D | (0x1F5F..0x1F7D) | (0x1F80..0x1FB4) | (0x1FB6..0x1FBC) | 0x1FBE | (0x1FC2..0x1FC4) | (0x1FC6..0x1FCC) | (0x1FD0..0x1FD3) | (0x1FD6..0x1FDB) | (0x1FE0..0x1FEC) | (0x1FF2..0x1FF4) | (0x1FF6..0x1FFC) | 0x2126 | (0x212A..0x212B) | 0x212E | (0x2180..0x2182) | (0x3041..0x3094) | (0x30A1..0x30FA) | (0x3105..0x312C) | (0xAC00..0xD7A3);
+
+ # Full Unicode 3.1 requires: Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD) | (0x10000..0x10FFFF);
+
+ Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD);
+
+ Letter = BaseChar | Ideographic;
+
+ NameChar = Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender;
+
+ include CommonXml "xmlcommon.rl";
+
+}%%
+
+ %% write data;
+
+ int XML::init( )
+ {
+ %% write init;
+ cur_char = 0;
+ return 1;
+ }
+
+ int XML::execute( const unsigned short *data, int len )
+ {
+ const unsigned short *p = data;
+ const unsigned short *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == XML_error )
+ return -1;
+ if ( cs >= XML_first_final )
+ return 1;
+ return 0;
+ }
+
+ int XML::finish( )
+ {
+ %% write eof;
+ if ( cs == XML_error )
+ return -1;
+ if ( cs >= XML_first_final )
+ return 1;
+ return 0;
+ }
+
+ int main()
+ {
+ return 0;
+ }
+/* _____OUTPUT_____
+_____OUTPUT_____ */
diff --git a/test/xmlcommon.rl b/test/xmlcommon.rl
new file mode 100644
index 0000000..e7a855e
--- /dev/null
+++ b/test/xmlcommon.rl
@@ -0,0 +1,205 @@
+/*
+ * This file is included by xml.rl
+ *
+ * @IGNORE: yes
+ */
+
+%%{
+
+ #
+ # Common XML grammar rules based on the XML 1.0 BNF from:
+ # http://www.jelks.nu/XML/xmlebnf.html
+ #
+
+ machine CommonXml;
+
+ S = (0x20 | 0x9 | 0xD | 0xA)+;
+
+ # WAS PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%];
+ PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [\-'()+,./:=?;!*#@$_%];
+
+ PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar - "'")* "'";
+
+ Name = (Letter | '_' | ':') (NameChar)*;
+
+ Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->';
+
+ # Used strong subtraction operator, and replaced * with +. Ragel complained since using
+ # * results in a machine that accepts 0 length strings, and later it's only used in an
+ # optional construct anyway.
+ #
+ CharData_Old = [^<&]* - ([^<&]* ']]>' [^<&]*);
+ CharData = [^<&]+ -- ']]>';
+
+ SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'");
+
+ Eq = S? '=' S?;
+
+ VersionNum = ([a-zA-Z0-9_.:] | '-')+;
+
+ # WAS S 'version' Eq (' VersionNum ' | " VersionNum ") - fixed quotes
+ VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"');
+
+ ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral;
+
+ PublicID = 'PUBLIC' S PubidLiteral;
+
+ NotationDecl = '<!NOTATION' S Name S (ExternalID | PublicID) S? '>';
+
+ EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*;
+
+ EncodingDecl = S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" );
+
+ # UNUSED TextDecl = '<?xml' VersionInfo? EncodingDecl S? '?>';
+
+ NDataDecl = S 'NDATA' S Name;
+
+ PEReference = '%' Name ';';
+
+ EntityRef = '&' Name ';';
+
+ CharRef = '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';';
+
+ Reference = EntityRef | CharRef;
+
+ EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'";
+
+ PEDef = EntityValue | ExternalID;
+
+ EntityDef = EntityValue | (ExternalID NDataDecl?);
+
+ PEDecl = '<!ENTITY' S '%' S Name S PEDef S? '>';
+
+ GEDecl = '<!ENTITY' S Name S EntityDef S? '>';
+
+ EntityDecl = GEDecl | PEDecl;
+
+ Mixed = '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')';
+
+ # WAS cp = (Name | choice | seq) ('?' | '*' | '+')?;
+
+ # WAS seq = '(' S? cp ( S? ',' S? cp )* S? ')';
+
+ # WAS choice = '(' S? cp ( S? '|' S? cp )* S? ')';
+
+ # WAS children = (choice | seq) ('?' | '*' | '+')?;
+
+ # TODO put validation for this in and make it clearer
+ alt = '?' | '*' | '+';
+ children = '(' S?
+ ( ( Name alt? ) |
+ '(' |
+ ( ')' alt? ) |
+ [,|] |
+ S )
+ ')' alt?;
+
+ contentspec = 'EMPTY' | 'ANY' | Mixed | children;
+
+ elementdecl = '<!ELEMENT' S Name S contentspec S? '>';
+
+ AttValue = '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'";
+
+ Attribute = Name Eq AttValue;
+
+ Nmtoken = (NameChar)+;
+
+ # UNUSED Nmtokens = Nmtoken (S Nmtoken)*;
+
+ Enumeration = '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')';
+
+ NotationType = 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')';
+
+ EnumeratedType = NotationType | Enumeration;
+
+ TokenizedType = 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS';
+
+ StringType = 'CDATA';
+
+ AttType = StringType | TokenizedType | EnumeratedType;
+
+ DefaultDecl = '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue);
+
+ AttDef = S Name S AttType S DefaultDecl;
+
+ AttlistDecl = '<!ATTLIST' S Name AttDef* S? '>';
+
+ EmptyElemTag = '<' Name (S Attribute)* S? '/>';
+
+ ETag = '</' Name S? '>';
+
+ PITarget_Old = Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'));
+ PITarget = Name -- "xml"i;
+
+ PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>';
+
+ markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment;
+
+ doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>';
+
+ # TODO extSubsetDecl = ( markupdecl | conditionalSect | PEReference | S )*;
+ # UNUSED extSubsetDecl = ( markupdecl | PEReference | S )*;
+
+ # UNUSED extSubset = TextDecl? extSubsetDecl;
+
+ # UNUSED Ignore = Char* - (Char* ('<![' | ']]>') Char*);
+
+ # TODO: ignoreSectContents = Ignore ('<![' ignoreSectContents ']]>' Ignore)*;
+ # UNUSED ignoreSectContents = Ignore ('<![' ']]>' Ignore)*;
+
+ # UNUSED ignoreSect = '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>';
+
+ # UNUSED includeSect = '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>';
+
+ # UNUSED conditionalSect = includeSect | ignoreSect;
+
+ STag = '<' Name (S Attribute)* S? '>';
+
+ CDStart = '<![CDATA[';
+
+ CDEnd = ']]>';
+
+ # WAS CData = (Char* - (Char* ']]>' Char*));
+ CData = (Char* -- CDEnd);
+
+ CDSect = CDStart CData CDEnd;
+
+ # UNUSED Subcode = ([a-z] | [A-Z])+;
+
+ # UNUSED UserCode = ('x' | 'X') '-' ([a-z] | [A-Z])+;
+
+ # UNUSED IanaCode = ('i' | 'I') '-' ([a-z] | [A-Z])+;
+
+ # UNUSED ISO639Code = ([a-z] | [A-Z]) ([a-z] | [A-Z]);
+
+ # UNUSED Langcode = ISO639Code | IanaCode | UserCode;
+
+ # UNUSED LanguageID = Langcode ('-' Subcode)*;
+
+ SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'));
+
+ # UNUSED extPE = TextDecl? extSubsetDecl;
+
+ Misc = Comment | PI | S;
+
+ XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>';
+
+ prolog = XMLDecl? Misc* (doctypedecl Misc*)?;
+
+ # UNUSED Names = Name (S Name)*;
+
+ # Added fcall - TODO check logic is correct
+ # UNUSED extParsedEnt = TextDecl? @{fcall content;};
+
+ # TODO tag stack validation
+
+ # WAS element = EmptyElemTag | STag content ETag
+ # WAS content = (element | CharData | Reference | CDSect | PI | Comment)*;
+ content = (EmptyElemTag | STag | ETag | CharData | Reference | CDSect | PI | Comment)*;
+
+ # WAS document = prolog element Misc*;
+ document = prolog ( EmptyElemTag | ( STag content ETag ) ) Misc*;
+
+ main := document;
+
+}%%
diff --git a/version.mk b/version.mk
new file mode 100644
index 0000000..0193c3d
--- /dev/null
+++ b/version.mk
@@ -0,0 +1,2 @@
+VERSION = 5.16
+PUBDATE = November 2006